diff --git a/pyproject.toml b/pyproject.toml
index 97fb917..3726d91 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -112,6 +112,14 @@ dev = [
     "ruff>=0.4.0",
     "pyrefly==0.61.1",
 ]
+# Spike for issue #69 Option B (parser-replacement bake-off).
+# Three CommonMark/GFM libraries evaluated as candidates for replacing the
+# hand-rolled `shared/markdown_parser.py`. Not a runtime dep -- spike only.
+spike-parser = [
+    "mistune>=3.0",
+    "markdown-it-py>=3.0",
+    "marko>=2.0",
+]
 
 # ---------------------------------------------------------------------------
 # Pyrefly type checker configuration
diff --git a/scripts/parser_spike/__init__.py b/scripts/parser_spike/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/parser_spike/benchmark.py b/scripts/parser_spike/benchmark.py
new file mode 100644
index 0000000..0596730
--- /dev/null
+++ b/scripts/parser_spike/benchmark.py
@@ -0,0 +1,136 @@
+"""Parser-replacement spike benchmark.
+
+Measures parse-and-translate time for the four parsers (baseline +
+three candidates) across a synthetic corpus scaled to ~10KB. Reports
+median, p95, and per-construct cost so the Option B decision has
+hard numbers to weigh.
+
+Run::
+
+    uv run python scripts/parser_spike/benchmark.py
+
+Acceptance criteria (per issue #69 follow-up):
+- 10KB mixed-content document under 5ms median on CI hardware.
+- Translator LOC under 250 per library.
+"""
+
+from __future__ import annotations
+
+import statistics
+import time
+from pathlib import Path
+
+from chat_sdk.shared.markdown_parser import parse_markdown as baseline_parse
+from chat_sdk.shared.parser_spike.markdown_it_translator import (
+    parse_markdown as markdown_it_parse,
+)
+from chat_sdk.shared.parser_spike.marko_translator import parse_markdown as marko_parse
+from chat_sdk.shared.parser_spike.mistune_translator import parse_markdown as mistune_parse
+
+FIXTURE_PATH = Path(__file__).resolve().parents[2] / "tests" / "parser_spike" / "fixtures" / "mixed_content.md"
+
+
+def _build_corpus(target_bytes: int = 10_240) -> str:
+    base = FIXTURE_PATH.read_text(encoding="utf-8")
+    out = []
+    size = 0
+    while size < target_bytes:
+        out.append(base)
+        size += len(base.encode("utf-8"))
+    return "\n".join(out)
+
+
+def _time_one(fn, text: str, iterations: int) -> list[float]:
+    timings = []
+    for _ in range(iterations):
+        t0 = time.perf_counter()
+        fn(text)
+        timings.append((time.perf_counter() - t0) * 1000.0)
+    return timings
+
+
+def _translator_loc() -> dict[str, int]:
+    """Count lines of code per translator, excluding blanks, line comments,
+    and docstrings.
+
+    The docstring exclusion uses ``ast`` to identify ``Expr(Constant(str))``
+    statements -- the canonical docstring shape -- so we don't over-count
+    multi-line docstrings as logic LOC against the 250-LOC budget.
+    """
+    import ast
+
+    root = Path(__file__).resolve().parents[2] / "src" / "chat_sdk" / "shared" / "parser_spike"
+    out = {}
+    for name, path in [
+        ("mistune", root / "mistune_translator.py"),
+        ("markdown-it-py", root / "markdown_it_translator.py"),
+        ("marko", root / "marko_translator.py"),
+    ]:
+        text = path.read_text(encoding="utf-8")
+        # Identify docstring line ranges via AST: any Expr(Constant(str))
+        # immediately under a module, class, or function definition.
+        tree = ast.parse(text)
+        docstring_lines: set[int] = set()
+        for node in ast.walk(tree):
+            if not isinstance(node, ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef):
+                continue
+            body = getattr(node, "body", None)
+            if not body:
+                continue
+            first = body[0]
+            if (
+                isinstance(first, ast.Expr)
+                and isinstance(first.value, ast.Constant)
+                and isinstance(first.value.value, str)
+            ):
+                end_lineno = first.end_lineno or first.lineno
+                docstring_lines.update(range(first.lineno, end_lineno + 1))
+
+        code_lines = 0
+        for lineno, line in enumerate(text.splitlines(), start=1):
+            stripped = line.strip()
+            if not stripped or stripped.startswith("#"):
+                continue
+            if lineno in docstring_lines:
+                continue
+            code_lines += 1
+        out[name] = code_lines
+    return out
+
+
+def main() -> None:
+    corpus = _build_corpus()
+    actual_bytes = len(corpus.encode("utf-8"))
+    print(f"Corpus: {actual_bytes:,} bytes (~{actual_bytes / 1024:.1f} KB)")
+
+    # Warm-up: each parser caches some regexes / token-rule chains.
+    for fn in (baseline_parse, mistune_parse, markdown_it_parse, marko_parse):
+        for _ in range(3):
+            fn(corpus)
+
+    iterations = 50
+    print(f"Iterations per parser: {iterations}\n")
+
+    print(f"{'parser':<20} {'median (ms)':>12} {'p95 (ms)':>12} {'min (ms)':>12} {'max (ms)':>12}")
+    print("-" * 70)
+    for name, fn in [
+        ("baseline (hand)", baseline_parse),
+        ("mistune", mistune_parse),
+        ("markdown-it-py", markdown_it_parse),
+        ("marko", marko_parse),
+    ]:
+        timings = _time_one(fn, corpus, iterations)
+        timings.sort()
+        median = statistics.median(timings)
+        p95 = timings[int(len(timings) * 0.95)]
+        print(f"{name:<20} {median:>12.2f} {p95:>12.2f} {min(timings):>12.2f} {max(timings):>12.2f}")
+
+    print("\nTranslator LOC (excluding blank lines, line comments, and docstrings):")
+    print("-" * 70)
+    for name, loc in _translator_loc().items():
+        budget_marker = " ✓" if loc < 250 else " ✗ (over 250-LOC budget)"
+        print(f"  {name:<20} {loc:>4} lines{budget_marker}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/chat_sdk/shared/parser_spike/README.md b/src/chat_sdk/shared/parser_spike/README.md
new file mode 100644
index 0000000..12a5a97
--- /dev/null
+++ b/src/chat_sdk/shared/parser_spike/README.md
@@ -0,0 +1,263 @@
+# Parser-replacement spike (issue #69 Option B)
+
+This directory is **not part of the runtime SDK**. It exists so the
+three candidate markdown libraries can be benchmarked and diffed
+against the existing hand-rolled `shared/markdown_parser.py` in a
+controlled way before any production code is touched.
+
+## How to run
+
+```bash
+# Install spike dev deps (one-off)
+uv sync --group dev --group spike-parser
+
+# Diff candidate mdast trees against the baseline
+uv run pytest tests/parser_spike/test_mdast_parity.py -s
+
+# Run the benchmark + LOC report
+uv run python scripts/parser_spike/benchmark.py
+```
+
+## Current results (sample run, local machine)
+
+Numbers will vary on CI hardware but the **relative ordering is stable**
+across runs.
+
+### Parse-and-translate time (12KB mixed corpus, 50 iterations)
+
+| parser            | median  | p95     | meets 5ms budget? |
+|-------------------|--------:|--------:|-------------------|
+| baseline (hand)   |  2.59ms |  2.72ms | ✓                 |
+| mistune           | 11.94ms | 13.04ms | ✗ (2.4× over)     |
+| markdown-it-py    | 13.36ms | 20.64ms | ✗ (2.7× over)     |
+| marko             | 46.62ms | 49.58ms | ✗ (9.3× over)     |
+
+The baseline is **~5× faster** than mistune and markdown-it-py and
+**~18× faster** than marko. The 5ms acceptance criterion from issue #69
+is met by the baseline alone.
+
+### Translator LOC (excluding blank lines, line comments, and docstrings)
+
+| library         | LOC | 250-LOC budget |
+|-----------------|----:|----------------|
+| mistune         | 149 | ✓               |
+| markdown-it-py  | 194 | ✓               |
+| marko           | 147 | ✓               |
+
+All three fit comfortably. mistune and marko both come in under 150
+lines of logic for the translator layer.
+
+### mdast fidelity on the happy path (`mixed_content.md`)
+
+Tested against a ≈3KB corpus of headings, tables, code blocks, lists,
+links, images, blockquotes, emphasis — constructs the baseline parser
+*does* handle.
+
+| library         | divergences |
+|-----------------|------------:|
+| mistune         | 26          |
+| markdown-it-py  | 24          |
+| marko           | 27          |
+
+**Important caveat**: of the ~25 divergences each candidate has, the
+vast majority are cases where the **baseline diverges from the mdast
+spec**, not where the candidate does. The most common patterns:
+
+- **Soft line breaks inside paragraphs / blockquotes**: candidates
+  emit `text + text("\n") + text` (per mdast spec); baseline merges
+  them into a single text node.
+- **Inline link followed by text**: candidates emit
+  `link(...) + text(".")`; baseline emits a single trailing text node
+  for `link(...).` that drops the URL.
+- **Trailing newline in fenced code values**: mistune and marko
+  preserve the trailing `\n`; baseline strips it.
+
+These are **structural improvements**, not regressions. Adopting any
+of the candidates would also fix several baseline correctness bugs as
+a side effect — albeit changing the mdast shape that downstream code
+currently depends on.
+
+The one candidate-side bug surfaced was marko losing GFM table
+alignment metadata (a translator fix; not investigated further in the
+spike).
+
+### Completeness gap on hard constructs (`gap_cases.md`)
+
+The happy-path comparison above is **not the whole picture**: the
+baseline parser is documented as not handling several CommonMark / GFM
+constructs at all (see `docs/UPSTREAM_SYNC.md:442`). On those
+constructs it silently flattens to `text` / `paragraph` nodes — the
+same surface area issue #69 was opened to address.
+
+`fixtures/gap_cases.md` exercises six gap constructs. **Silent drop**
+means the construct was parsed as ordinary text/paragraph; **recognised**
+means the parser emitted the correct mdast node type.
+
+| construct             | baseline    | mistune    | markdown-it-py | marko       |
+|-----------------------|-------------|------------|----------------|-------------|
+| setext heading        | silent drop | recognised | recognised     | recognised  |
+| indented code block   | silent drop | recognised | recognised     | recognised  |
+| task list item        | recognised¹ | silent drop| recognised     | recognised  |
+| footnote definition   | silent drop | silent drop| silent drop²   | silent drop |
+| inline HTML           | silent drop | silent drop| silent drop    | silent drop |
+| definition list       | silent drop | silent drop| silent drop    | silent drop |
+| **silent-drop count** | **5**       | **4**      | **3**          | **3**       |
+
+¹ Baseline matches `- [x]` as a list item but doesn't extract the
+checkbox state.
+² markdown-it-py supports footnotes via the `mdit-py-plugins` package
+(not pulled in by the spike); enabling it would drop the silent-drop
+count to 2.
+
+**The baseline is strictly worse on completeness than every
+candidate.** That's the half of the perf comparison the happy-path
+numbers don't show: baseline runs faster partly because it does less
+work per byte — setext headings, indented code, multi-backtick spans,
+escaped chars, and raw HTML all skip straight through the inline
+fast-paths instead of being parsed.
+
+## Implication for the Option A/B/C decision
+
+The spike data argues against a clean recommendation in either
+direction:
+
+1. **Performance**: baseline wins at 2.59ms median vs 11-47ms for the
+   candidates. But that win is at least partly a function of doing
+   *less work per byte*: the baseline skips entire construct families
+   on the fast path, while the libraries fully tokenise them. Apples
+   to apples requires either teaching the baseline to handle setext +
+   indented code + escaped chars (Option A) and re-measuring, or
+   accepting that the perf gap pays for genuine completeness.
+
+2. **mdast fidelity on the happy path**: all three candidates are
+   roughly equivalent (24-27 minor divergences) and each closes some
+   baseline correctness bugs. mostly toward greater spec compliance.
+
+3. **Completeness on hard constructs**: the baseline is strictly
+   worse than every candidate. It silently flattens setext, indented
+   code, multi-backtick spans, escaped chars, raw HTML, and definition
+   lists into plain text — the exact gap list issue #69 enumerated.
+
+4. **Translator LOC**: all under the 250-line budget.
+
+### Three options now, not two
+
+- **Option A (close baseline gaps in-tree)**: write parser code for
+  setext, indented code, escaped chars, multi-backtick spans (the
+  ones #69 listed as common in LLM output). Estimated ~300-400 LOC of
+  carefully-tested regex / state-machine work, plus the existing
+  parser keeps its 2.6ms perf. Doesn't address `_remend` gaps from the
+  issue #69 follow-up comment.
+
+- **Option B (library swap)**: pay the 5× perf hit (10-15ms median)
+  for `mistune` or `markdown-it-py`, eat ~150-215 LOC of translator,
+  close the completeness gap *and* most `_remend` gaps in one motion.
+  **markdown-it-py is now the preferred candidate** (best
+  completeness score, only 1.5ms slower than mistune), with
+  `mdit-py-plugins` available for footnotes if needed later. mistune
+  is the runner-up. marko drops out on performance.
+
+- **Option D (split the problem)**: keep the fast hand-rolled parser
+  *and* close gaps in-tree (Option A), but separately port upstream
+  `remend` directly for the streaming side. Two efforts, two PRs, but
+  preserves perf while closing both bug classes. More total work than
+  Option B but no dependency added.
+
+### Recommendation
+
+The right answer depends on team priorities the spike can't answer:
+
+- **If 10ms median parse time is fine** (likely true for chat
+  streaming, where LLM token latency dwarfs this), **Option B with
+  markdown-it-py is the cleanest path**. One PR, one dep, both gap
+  lists close.
+- **If we want zero-dep core preserved**, **Option D** is the only
+  path that keeps the install footprint small while closing both bug
+  classes. Highest total effort.
+- **If neither perf nor zero-dep is sacred**, Option B still wins on
+  effort per fix delivered.
+
+Option C (selective parser-side fixes only, the original framing in
+the issue) leaves the streaming-side bugs from the #69 follow-up
+comment unaddressed and should be ruled out unless we ship it
+alongside a separate `_remend` fix.
+
+## Triggers to revisit this decision
+
+The chat-scoped Option A (PRs #99 + #101) is the right call **for the
+SDK's current scope** -- LLM output rendered into chat platforms. The
+moment the input source or rendering target changes, the spike data
+should be re-run with a workload-shaped fixture before deciding
+anything.
+
+Concrete triggers that should cause us to re-open this:
+
+- **A non-chat input surface lands.** The chat-scoped assumption is
+  "input comes from an LLM; humans don't write the markdown we parse."
+  That breaks the moment we start parsing markdown that humans (or
+  external corpora) authored:
+  - User-authored memory / notes / scratchpads stored in the SDK
+  - Ingestion of `*.md` files for RAG-style workflows
+  - Parsing incoming GitHub PR/issue bodies for structure extraction
+    (today the GitHub adapter mostly emits, not parses)
+  - Any "import markdown" public API
+  Human-authored content routinely uses setext, indented code,
+  footnotes, raw HTML, and multi-backtick spans -- exactly the gaps
+  the baseline silently drops.
+
+- **A long-form artifact output surface lands.** When agents start
+  emitting research-summary / report / document artifacts (not chat
+  messages), the workload shifts toward CommonMark fidelity:
+  - Footnotes for citations
+  - Math regions rendered (not just sanitised)
+  - Multi-backtick code spans for technical documentation
+  - Tables with richer cell content
+  Parsing for an artifact also happens once per document, not per
+  stream chunk -- which makes the 5-18× perf cost of Option B much
+  more tolerable than it is for streaming.
+
+- **A web rendering surface for chat-sdk-python.** Upstream added
+  `@chat-adapter/web` in v4.27.0 (a browser-side chat UI). It's
+  explicitly out of scope for chat-sdk-python today (see PR #83 sync
+  scope). If that ever ships in Python, the rendering target tolerates
+  richer markdown because the browser can display setext / footnotes /
+  HTML natively.
+
+- **A new chat platform that demands richer parsing.** Unlikely in
+  the near term -- the existing eight platforms all render a similar
+  CommonMark subset. But e.g. a platform with native footnote support
+  could surface a gap.
+
+### Upstream check (May 2026)
+
+Spot-checked `vercel/chat`'s `packages/` directory at the time of
+writing. The only relevant package besides the eight chat adapters and
+the core/state packages is **`adapter-web`** (added in v4.27.0, Python
+port deferred). No artifact-rendering, RAG, document-ingestion, or
+standalone markdown-rendering packages exist upstream. The triggers
+above are forward-looking -- none are imminent in upstream-tracked
+work.
+
+### Playbook for re-running
+
+When a trigger materialises:
+
+1. Author a fixture file under `tests/parser_spike/fixtures/` that
+   represents the new surface's actual content (not generic
+   CommonMark -- workload-shaped).
+2. Re-run `pytest tests/parser_spike/test_mdast_parity.py -s` and
+   `python scripts/parser_spike/benchmark.py`. Both pick up the new
+   fixture automatically if added to `conftest.py`.
+3. Compare the silent-drop count and benchmark numbers against the
+   chat-scoped findings above. The decision matrix shifts toward
+   Option B when:
+   - Silent-drop count is materially higher on the new fixture
+     (≥6 constructs that the new surface needs)
+   - Parse latency is one-shot rather than per-stream-chunk
+   - The team is OK adding a dependency to the runtime core
+4. If thresholds are met, promote `markdown-it-py` translator from
+   `parser_spike/` into runtime (it's the preferred candidate per
+   the spike data). Add `markdown-it-py` to the relevant extras
+   group (not `dependencies`, to preserve zero-dep core install for
+   chat-only consumers).
+
diff --git a/src/chat_sdk/shared/parser_spike/__init__.py b/src/chat_sdk/shared/parser_spike/__init__.py
new file mode 100644
index 0000000..598ec03
--- /dev/null
+++ b/src/chat_sdk/shared/parser_spike/__init__.py
@@ -0,0 +1,31 @@
+"""Parser-replacement spike for issue #69 Option B.
+
+Three candidate libraries are evaluated as drop-in replacements for the
+hand-rolled ``shared/markdown_parser.py``:
+
+- ``mistune`` (3.x)
+- ``markdown-it-py`` (4.x)
+- ``marko`` (2.x)
+
+Each gets a thin translator that converts the library's native token /
+AST format into the mdast-compatible dict shape produced by
+``shared.markdown_parser.parse_markdown``. The contract: same input
+markdown should produce the same mdast tree across all four parsers
+(the existing hand-rolled one + the three candidates), modulo
+documented divergences.
+
+This module is NOT imported by the runtime SDK. It exists purely so
+the bake-off harness in ``tests/parser_spike/`` and
+``scripts/parser_spike/`` can exercise the candidates side-by-side
+without touching production code paths.
+
+The decision criteria (per the issue #69 follow-up plan):
+  1. mdast fidelity vs the existing parser on the fixture corpus
+  2. Translator LOC (target: <250 per library)
+  3. Parse-and-translate time (target: <5ms on 10KB mixed content)
+  4. GFM coverage (tables, strikethrough, task lists)
+  5. Extensibility surface for the gaps in #69 (setext, footnotes,
+     escaped chars, multi-backtick code spans, raw HTML, indented code)
+"""
+
+from __future__ import annotations
diff --git a/src/chat_sdk/shared/parser_spike/markdown_it_translator.py b/src/chat_sdk/shared/parser_spike/markdown_it_translator.py
new file mode 100644
index 0000000..ddf5705
--- /dev/null
+++ b/src/chat_sdk/shared/parser_spike/markdown_it_translator.py
@@ -0,0 +1,246 @@
+"""markdown-it-py (4.x) -> mdast translator.
+
+markdown-it tokenises into a flat list of ``Token`` objects (each with
+``type``, ``tag``, ``content``, ``children``, ``markup``, ``attrs``,
+``meta``). Block-level constructs use ``_open`` / ``_close`` pairs and
+must be folded into a tree. Inline tokens (under ``inline`` parents)
+are already nested.
+
+GFM features (tables, strikethrough) are enabled by selecting the
+``gfm-like`` preset and adding the strikethrough rule explicitly.
+Task-list rendering would require ``mdit-py-plugins`` (deferred).
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from markdown_it import MarkdownIt
+from markdown_it.token import Token
+
+from chat_sdk.shared.markdown_parser import (
+    Content,
+    Root,
+    make_blockquote,
+    make_break,
+    make_code,
+    make_delete,
+    make_emphasis,
+    make_heading,
+    make_image,
+    make_inline_code,
+    make_link,
+    make_list,
+    make_list_item,
+    make_paragraph,
+    make_root,
+    make_strong,
+    make_table,
+    make_table_cell,
+    make_table_row,
+    make_text,
+    make_thematic_break,
+)
+
+_MD = MarkdownIt("commonmark").enable(["table", "strikethrough"])
+
+
+def parse_markdown(text: str) -> Root:
+    tokens = _MD.parse(text)
+    children, _ = _consume_blocks(tokens, 0, end_type=None)
+    return make_root(children)
+
+
+def _consume_blocks(tokens: list[Token], i: int, end_type: str | None) -> tuple[list[Content], int]:
+    """Walk tokens until we hit *end_type* (or end of list). Return the
+    list of mdast block children produced and the index after the closer.
+    """
+    children: list[Content] = []
+    while i < len(tokens):
+        tok = tokens[i]
+        if end_type is not None and tok.type == end_type:
+            return children, i + 1
+
+        if tok.type == "paragraph_open":
+            inline = tokens[i + 1]
+            children.append(make_paragraph(_translate_inline(inline.children or [])))
+            i += 3  # paragraph_open, inline, paragraph_close
+            continue
+
+        if tok.type == "heading_open":
+            depth = int(tok.tag[1])  # h1 -> 1, h2 -> 2, ...
+            inline = tokens[i + 1]
+            children.append(make_heading(depth, _translate_inline(inline.children or [])))
+            i += 3
+            continue
+
+        if tok.type == "hr":
+            children.append(make_thematic_break())
+            i += 1
+            continue
+
+        if tok.type == "fence":
+            lang = tok.info.split()[0] if tok.info and tok.info.strip() else None
+            value = tok.content.rstrip("\n")
+            children.append(make_code(value, lang=lang))
+            i += 1
+            continue
+
+        if tok.type == "code_block":
+            children.append(make_code(tok.content.rstrip("\n"), lang=None))
+            i += 1
+            continue
+
+        if tok.type == "blockquote_open":
+            inner, i = _consume_blocks(tokens, i + 1, "blockquote_close")
+            children.append(make_blockquote(inner))
+            continue
+
+        if tok.type == "bullet_list_open":
+            items, i = _consume_list(tokens, i + 1, "bullet_list_close")
+            children.append(make_list(items, ordered=False))
+            continue
+
+        if tok.type == "ordered_list_open":
+            start = int((tok.attrs or {}).get("start", 1))
+            items, i = _consume_list(tokens, i + 1, "ordered_list_close")
+            children.append(make_list(items, ordered=True, start=start))
+            continue
+
+        if tok.type == "table_open":
+            table, i = _consume_table(tokens, i + 1)
+            children.append(table)
+            continue
+
+        # Unknown / unhandled token: skip but don't crash.
+        i += 1
+
+    return children, i
+
+
+def _consume_list(tokens: list[Token], i: int, end_type: str) -> tuple[list[Content], int]:
+    items: list[Content] = []
+    while i < len(tokens):
+        tok = tokens[i]
+        if tok.type == end_type:
+            return items, i + 1
+        if tok.type == "list_item_open":
+            inner, i = _consume_blocks(tokens, i + 1, "list_item_close")
+            items.append(make_list_item(inner))
+            continue
+        i += 1
+    return items, i
+
+
+def _consume_table(tokens: list[Token], i: int) -> tuple[Content, int]:
+    rows: list[Content] = []
+    in_header = False
+    header_aligns: list[str | None] = []
+    current_row: list[Content] = []
+    current_aligns: list[str | None] = []
+
+    while i < len(tokens):
+        tok = tokens[i]
+        if tok.type == "table_close":
+            return make_table(rows, align=header_aligns if any(header_aligns) else None), i + 1
+        if tok.type == "thead_open":
+            in_header = True
+        elif tok.type == "thead_close":
+            in_header = False
+        elif tok.type == "tr_open":
+            current_row = []
+            current_aligns = []
+        elif tok.type == "tr_close":
+            rows.append(make_table_row(current_row))
+            if in_header:
+                header_aligns = current_aligns
+        elif tok.type in ("th_open", "td_open"):
+            style = (tok.attrs or {}).get("style", "")
+            cell_align: str | None = None
+            if isinstance(style, str):
+                if "text-align:left" in style:
+                    cell_align = "left"
+                elif "text-align:center" in style:
+                    cell_align = "center"
+                elif "text-align:right" in style:
+                    cell_align = "right"
+            current_aligns.append(cell_align)
+            inline = tokens[i + 1]
+            current_row.append(make_table_cell(_translate_inline(inline.children or [])))
+            i += 3  # th/td_open, inline, th/td_close
+            continue
+        i += 1
+    return make_table(rows, align=header_aligns if any(header_aligns) else None), i
+
+
+def _translate_inline(tokens: list[Token]) -> list[Content]:
+    out: list[Content] = []
+    # Each stack frame holds (parent_list, meta) -- meta is None for plain
+    # containers (strong/emphasis/delete) and a (href, title) tuple for
+    # links. Using a tuple instead of pipe-stuffing a string sidesteps the
+    # fragility of URLs/titles that contain pipe characters.
+    stack: list[tuple[list[Content], tuple[str, str | None] | None]] = []
+    current = out
+
+    def open_container() -> None:
+        nonlocal current
+        new_children: list[Content] = []
+        stack.append((current, None))
+        current = new_children
+
+    def close_container(make: Any) -> None:
+        nonlocal current
+        kids = current
+        parent, _meta = stack.pop()
+        current = parent
+        current.append(make(kids))
+
+    for tok in tokens:
+        t = tok.type
+        if t == "text":
+            current.append(make_text(tok.content))
+        elif t == "softbreak":
+            current.append(make_text("\n"))
+        elif t == "hardbreak":
+            current.append(make_break())
+        elif t == "code_inline":
+            current.append(make_inline_code(tok.content))
+        elif t == "strong_open":
+            open_container()
+        elif t == "strong_close":
+            close_container(make_strong)
+        elif t == "em_open":
+            open_container()
+        elif t == "em_close":
+            close_container(make_emphasis)
+        elif t == "s_open":
+            open_container()
+        elif t == "s_close":
+            close_container(make_delete)
+        elif t == "link_open":
+            attrs = tok.attrs or {}
+            href = str(attrs.get("href", ""))
+            raw_title = attrs.get("title")
+            title: str | None = str(raw_title) if raw_title is not None else None
+            link_children: list[Content] = []
+            stack.append((current, (href, title)))
+            current = link_children
+        elif t == "link_close":
+            kids = current
+            parent, meta = stack.pop()
+            current = parent
+            href, title = meta if meta else ("", None)
+            current.append(make_link(href, kids, title=title or None))
+        elif t == "image":
+            attrs = tok.attrs or {}
+            url = attrs.get("src", "")
+            raw_title = attrs.get("title")
+            title = str(raw_title) if raw_title is not None else None
+            alt = tok.content  # markdown-it precomputes alt text
+            current.append(make_image(str(url), alt=alt, title=title))
+        elif t == "html_inline":
+            current.append(make_text(tok.content))
+        else:
+            if tok.content:
+                current.append(make_text(tok.content))
+    return out
diff --git a/src/chat_sdk/shared/parser_spike/marko_translator.py b/src/chat_sdk/shared/parser_spike/marko_translator.py
new file mode 100644
index 0000000..9bc9530
--- /dev/null
+++ b/src/chat_sdk/shared/parser_spike/marko_translator.py
@@ -0,0 +1,179 @@
+"""marko (2.x) -> mdast translator.
+
+marko parses to a class-based AST (``marko.block.Document`` etc.). Each
+node exposes ``children`` (list[Node] or str payload). The GFM extension
+adds tables, strikethrough, task lists, autolinks.
+"""
+
+from __future__ import annotations
+
+import marko
+from marko.ext.gfm import GFM
+
+from chat_sdk.shared.markdown_parser import (
+    Content,
+    Root,
+    make_blockquote,
+    make_break,
+    make_code,
+    make_delete,
+    make_emphasis,
+    make_heading,
+    make_image,
+    make_inline_code,
+    make_link,
+    make_list,
+    make_list_item,
+    make_paragraph,
+    make_root,
+    make_strong,
+    make_table,
+    make_table_cell,
+    make_table_row,
+    make_text,
+    make_thematic_break,
+)
+
+_MD = marko.Markdown(extensions=[GFM])
+
+
+def parse_markdown(text: str) -> Root:
+    doc = _MD.parse(text)
+    children = [_translate(c) for c in getattr(doc, "children", [])]
+    return make_root([c for c in children if c is not None])
+
+
+def _translate(node: object) -> Content | None:
+    cls = type(node).__name__
+
+    if cls == "Paragraph":
+        return make_paragraph(_inline_children(node))
+    if cls == "Heading":
+        depth = int(getattr(node, "level", 1))
+        return make_heading(depth, _inline_children(node))
+    if cls == "SetextHeading":
+        depth = int(getattr(node, "level", 1))
+        return make_heading(depth, _inline_children(node))
+    if cls == "ThematicBreak":
+        return make_thematic_break()
+    if cls in ("FencedCode", "CodeBlock"):
+        lang = getattr(node, "lang", None) or None
+        value = _gather_code_text(node)
+        return make_code(value, lang=lang)
+    if cls == "Quote":
+        return make_blockquote(_block_children(node))
+    if cls == "List":
+        ordered = bool(getattr(node, "ordered", False))
+        start = int(getattr(node, "start", 1)) if ordered else 1
+        return make_list(_block_children(node), ordered=ordered, start=start)
+    if cls == "ListItem":
+        return make_list_item(_block_children(node))
+    if cls == "Table":
+        return _translate_table(node)
+    if cls == "BlankLine":
+        return None
+    if cls == "HTMLBlock":
+        return make_paragraph([make_text(getattr(node, "body", "") or "")])
+    # Fallback: stringify if we can.
+    return None
+
+
+def _block_children(node: object) -> list[Content]:
+    out: list[Content] = []
+    for child in getattr(node, "children", []) or []:
+        translated = _translate(child)
+        if translated is not None:
+            out.append(translated)
+    return out
+
+
+def _inline_children(node: object) -> list[Content]:
+    out: list[Content] = []
+    children = getattr(node, "children", None)
+    if isinstance(children, str):
+        return [make_text(children)]
+    for child in children or []:
+        translated = _translate_inline(child)
+        if translated is not None:
+            out.extend(translated) if isinstance(translated, list) else out.append(translated)
+    return out
+
+
+def _translate_inline(node: object) -> Content | list[Content] | None:
+    cls = type(node).__name__
+
+    if cls == "RawText":
+        value = getattr(node, "children", "")
+        return make_text(value if isinstance(value, str) else "")
+    if cls == "Literal":
+        return make_text(getattr(node, "children", "") or "")
+    if cls == "LineBreak":
+        # marko exposes a ``soft`` flag on the line-break node.
+        soft = bool(getattr(node, "soft", False))
+        return make_text("\n") if soft else make_break()
+    if cls == "InlineHTML":
+        return make_text(getattr(node, "children", "") or "")
+    if cls == "CodeSpan":
+        value = getattr(node, "children", "")
+        return make_inline_code(value if isinstance(value, str) else "")
+    if cls == "Emphasis":
+        return make_emphasis(_inline_children(node))
+    if cls == "StrongEmphasis":
+        return make_strong(_inline_children(node))
+    if cls == "Strikethrough":
+        return make_delete(_inline_children(node))
+    if cls == "Link":
+        url = getattr(node, "dest", "") or ""
+        title = getattr(node, "title", None) or None
+        return make_link(url, _inline_children(node), title=title)
+    if cls in ("AutoLink", "Url"):
+        url = getattr(node, "dest", "") or ""
+        return make_link(url, _inline_children(node))
+    if cls == "Image":
+        url = getattr(node, "dest", "") or ""
+        title = getattr(node, "title", None) or None
+        alt = "".join(_extract_text(c) for c in getattr(node, "children", []) or [])
+        return make_image(url, alt=alt, title=title)
+    # Fallback: any unrecognized inline -> stringify children if any.
+    value = getattr(node, "children", None)
+    if isinstance(value, str):
+        return make_text(value)
+    return None
+
+
+def _translate_table(node: object) -> Content:
+    rows: list[Content] = []
+    align: list[str | None] = list(getattr(node, "alignment", []) or [])
+    # marko stores alignments as ["left", "center", "right", None].
+    align = [a if a in ("left", "center", "right") else None for a in align]
+    for row in getattr(node, "children", []) or []:
+        cells: list[Content] = []
+        for cell in getattr(row, "children", []) or []:
+            cells.append(make_table_cell(_inline_children(cell)))
+        rows.append(make_table_row(cells))
+    return make_table(rows, align=align if any(align) else None)
+
+
+def _extract_text(node: object) -> str:
+    cls = type(node).__name__
+    if cls == "RawText":
+        v = getattr(node, "children", "")
+        return v if isinstance(v, str) else ""
+    children = getattr(node, "children", None)
+    if isinstance(children, str):
+        return children
+    return "".join(_extract_text(c) for c in children or [])
+
+
+def _gather_code_text(node: object) -> str:
+    children = getattr(node, "children", None)
+    if isinstance(children, str):
+        return children
+    parts: list[str] = []
+    for c in children or []:
+        v = getattr(c, "children", "")
+        if isinstance(v, str):
+            parts.append(v)
+        else:
+            parts.append(_extract_text(c))
+    return "".join(parts)
diff --git a/src/chat_sdk/shared/parser_spike/mistune_translator.py b/src/chat_sdk/shared/parser_spike/mistune_translator.py
new file mode 100644
index 0000000..017f905
--- /dev/null
+++ b/src/chat_sdk/shared/parser_spike/mistune_translator.py
@@ -0,0 +1,204 @@
+"""mistune (3.x) -> mdast translator.
+
+Uses ``mistune.create_markdown(renderer=None)`` to obtain the parser's
+internal token list, then maps each token type to its mdast equivalent.
+
+GFM plugins enabled: ``table``, ``strikethrough``, ``task_lists``,
+``url``.
+
+Notes on the token shape (mistune 3.x): each token is a dict with
+``type`` (always), ``children`` (block tokens + some inline), ``raw``
+(text leaves), ``attrs`` (heading levels, link urls, list metadata).
+The inline parser is invoked lazily; we drive it explicitly via
+``md.inline.parse`` for cells / list items where needed.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import mistune
+
+from chat_sdk.shared.markdown_parser import (
+    Content,
+    Root,
+    make_blockquote,
+    make_break,
+    make_code,
+    make_delete,
+    make_emphasis,
+    make_heading,
+    make_image,
+    make_inline_code,
+    make_link,
+    make_list,
+    make_list_item,
+    make_paragraph,
+    make_root,
+    make_strong,
+    make_table,
+    make_table_cell,
+    make_table_row,
+    make_text,
+    make_thematic_break,
+)
+
+# Single shared parser instance (mistune parsers are stateless after creation).
+_MD = mistune.create_markdown(
+    renderer=None,
+    plugins=["table", "strikethrough", "task_lists", "url"],
+)
+
+
+def parse_markdown(text: str) -> Root:
+    """Parse *text* and return an mdast-compatible root node."""
+    tokens, _state = _MD.parse(text)
+    # mistune's parse() return-type is `list[dict | str]` -- a bare str
+    # token is the rare lazy-text node that the public API stringifies
+    # directly. Narrow to dicts for the structural walker; lift any
+    # bare-string tokens into paragraph(text(...)) so they're not lost.
+    children: list[Content] = []
+    for tok in tokens:
+        if isinstance(tok, str):
+            if tok:
+                children.append(make_paragraph([make_text(tok)]))
+            continue
+        translated = _translate_block(tok)
+        if translated is not None:
+            children.append(translated)
+    return make_root(children)
+
+
+def _translate_block(tok: dict[str, Any]) -> Content | None:
+    t = tok.get("type")
+    if t == "blank_line":
+        return None
+    if t == "paragraph":
+        return make_paragraph(_translate_inline_children(tok))
+    if t == "heading":
+        depth = int(tok.get("attrs", {}).get("level", 1))
+        return make_heading(depth, _translate_inline_children(tok))
+    if t == "thematic_break":
+        return make_thematic_break()
+    if t == "block_code":
+        attrs = tok.get("attrs", {}) or {}
+        info = attrs.get("info")
+        lang = info.split()[0] if isinstance(info, str) and info.strip() else None
+        return make_code(tok.get("raw", ""), lang=lang)
+    if t == "block_quote":
+        children = [_translate_block(c) for c in tok.get("children", [])]
+        return make_blockquote([c for c in children if c is not None])
+    if t == "list":
+        attrs = tok.get("attrs", {}) or {}
+        ordered = bool(attrs.get("ordered"))
+        start = int(attrs.get("start", 1)) if ordered else 1
+        items = [_translate_block(c) for c in tok.get("children", [])]
+        items = [c for c in items if c is not None]
+        return make_list(items, ordered=ordered, start=start)
+    if t == "list_item":
+        children = [_translate_block(c) for c in tok.get("children", [])]
+        return make_list_item([c for c in children if c is not None])
+    if t == "block_text":
+        # Loose-list paragraph payload; mistune emits raw inline text.
+        return make_paragraph(_translate_inline_children(tok))
+    if t == "table":
+        return _translate_table(tok)
+    # Unknown block: render as a paragraph carrying its raw text so we
+    # don't silently drop content. The bake-off harness will flag this.
+    raw = tok.get("raw", "")
+    if raw:
+        return make_paragraph([make_text(raw)])
+    return None
+
+
+def _translate_table(tok: dict[str, Any]) -> Content:
+    rows: list[Content] = []
+    align: list[str | None] = []
+    for child in tok.get("children", []):
+        ctype = child.get("type")
+        if ctype == "table_head":
+            cells, head_align = _translate_table_row(child)
+            rows.append(make_table_row(cells))
+            align = head_align
+        elif ctype == "table_body":
+            for row in child.get("children", []):
+                if row.get("type") == "table_row":
+                    cells, _ = _translate_table_row(row)
+                    rows.append(make_table_row(cells))
+    return make_table(rows, align=align if any(align) else None)
+
+
+def _translate_table_row(row: dict[str, Any]) -> tuple[list[Content], list[str | None]]:
+    cells: list[Content] = []
+    aligns: list[str | None] = []
+    for cell in row.get("children", []):
+        if cell.get("type") not in ("table_cell",):
+            continue
+        attrs = cell.get("attrs", {}) or {}
+        align_val = attrs.get("align")
+        aligns.append(align_val if align_val in ("left", "center", "right") else None)
+        cells.append(make_table_cell(_translate_inline_children(cell)))
+    return cells, aligns
+
+
+def _translate_inline_children(tok: dict[str, Any]) -> list[Content]:
+    children = tok.get("children")
+    if children is None:
+        # mistune defers inline parsing for some tokens (e.g. headings
+        # built from setext logic). Parse the raw text now.
+        raw = tok.get("raw", "")
+        if not raw:
+            return []
+        children = _MD.inline.parse(raw, mistune.BlockState())  # type: ignore[arg-type]
+    out: list[Content] = []
+    for child in children or []:
+        translated = _translate_inline(child)
+        if translated is not None:
+            out.extend(translated) if isinstance(translated, list) else out.append(translated)
+    return out
+
+
+def _translate_inline(tok: dict[str, Any]) -> Content | list[Content] | None:
+    t = tok.get("type")
+    if t == "text":
+        return make_text(tok.get("raw", ""))
+    if t == "softbreak":
+        return make_text("\n")
+    if t == "linebreak":
+        return make_break()
+    if t == "codespan":
+        return make_inline_code(tok.get("raw", ""))
+    if t in ("strong", "emphasis", "delete", "strikethrough"):
+        kids = _translate_inline_children(tok)
+        if t == "strong":
+            return make_strong(kids)
+        if t == "emphasis":
+            return make_emphasis(kids)
+        return make_delete(kids)
+    if t == "link":
+        attrs = tok.get("attrs", {}) or {}
+        url = attrs.get("url", "")
+        title = attrs.get("title")
+        return make_link(url, _translate_inline_children(tok), title=title)
+    if t == "image":
+        attrs = tok.get("attrs", {}) or {}
+        url = attrs.get("url", "")
+        title = attrs.get("title")
+        # mistune nests alt as inline children; flatten to plain string.
+        alt = "".join(_extract_text(c) for c in tok.get("children", []) or [])
+        return make_image(url, alt=alt, title=title)
+    if t == "inline_html":
+        # mdast has html nodes; the existing hand-rolled parser doesn't
+        # emit them. Surface as plain text for parity with the baseline.
+        return make_text(tok.get("raw", ""))
+    raw = tok.get("raw")
+    if raw:
+        return make_text(raw)
+    return None
+
+
+def _extract_text(node: dict[str, Any]) -> str:
+    if node.get("type") == "text":
+        return node.get("raw", "")
+    children = node.get("children") or []
+    return "".join(_extract_text(c) for c in children)
diff --git a/tests/parser_spike/__init__.py b/tests/parser_spike/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/parser_spike/conftest.py b/tests/parser_spike/conftest.py
new file mode 100644
index 0000000..05d54f0
--- /dev/null
+++ b/tests/parser_spike/conftest.py
@@ -0,0 +1,25 @@
+"""Shared fixtures for the parser-replacement spike harness."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+FIXTURE_DIR = Path(__file__).parent / "fixtures"
+
+
+@pytest.fixture(scope="session")
+def mixed_content_markdown() -> str:
+    return (FIXTURE_DIR / "mixed_content.md").read_text(encoding="utf-8")
+
+
+@pytest.fixture(scope="session")
+def gap_cases_markdown() -> str:
+    """Constructs the hand-rolled parser explicitly doesn't support
+    (setext headings, footnotes, escaped chars, multi-backtick spans,
+    raw HTML, indented code blocks, math, task lists, autolinks,
+    definition lists). Used to measure the *completeness* gap, not
+    just the structural-equivalence gap.
+    """
+    return (FIXTURE_DIR / "gap_cases.md").read_text(encoding="utf-8")
diff --git a/tests/parser_spike/fixtures/gap_cases.md b/tests/parser_spike/fixtures/gap_cases.md
new file mode 100644
index 0000000..18678e6
--- /dev/null
+++ b/tests/parser_spike/fixtures/gap_cases.md
@@ -0,0 +1,54 @@
+Setext H1 underline
+===================
+
+Setext H2 underline
+-------------------
+
+Indented code block (4-space):
+
+    def hello():
+        return "world"
+
+A paragraph with escaped \*asterisks\* and escaped \[brackets\] and a
+literal backslash \\ in it.
+
+A footnote reference[^1] in running text.
+
+[^1]: This is the footnote body.
+
+Multi-backtick inline code: ``some `quoted` code`` and triple ```backticks
+with ``double`` inside```.
+
+Raw HTML block:
+
+<div class="callout">
+  <p>This is HTML, not markdown.</p>
+</div>
+
+Inline HTML: <span style="color:red">red text</span> and a self-closing
+<br/> mid-sentence.
+
+Word-internal asterisks: `5*3=15`, paths like `lib/*.so`, and
+glob*patterns*everywhere.
+
+Math: a single dollar $a^2 + b^2 = c^2$ and a display block:
+
+$$
+\int_0^\infty e^{-x^2} dx = \frac{\sqrt{\pi}}{2}
+$$
+
+A task list (GFM):
+
+- [ ] Pending item
+- [x] Completed item
+- [ ] Another pending one
+
+An autolink: <https://example.com> and an email <user@example.com>.
+
+A definition list (some flavors):
+
+term1
+:   definition for term 1
+
+term2
+:   definition for term 2
diff --git a/tests/parser_spike/fixtures/mixed_content.md b/tests/parser_spike/fixtures/mixed_content.md
new file mode 100644
index 0000000..9c41c3e
--- /dev/null
+++ b/tests/parser_spike/fixtures/mixed_content.md
@@ -0,0 +1,77 @@
+# Quarterly Report
+
+A short **overview** with some _emphasis_, ~~deletions~~, and `inline code`
+followed by a [link to the docs](https://example.com "Docs").
+
+## Section 1: Numbers
+
+Total revenue grew by **12.4%** quarter over quarter. Here are the splits:
+
+| Region        | Q1     | Q2     | Q3     |
+|:--------------|-------:|-------:|-------:|
+| North America | $12.3M | $14.1M | $15.8M |
+| EMEA          | $8.7M  | $9.2M  | $10.4M |
+| APAC          | $5.2M  | $5.9M  | $6.7M  |
+
+> Growth is driven by **enterprise** adoption in EMEA and renewed
+> demand in the APAC mid-market segment.
+
+## Section 2: Engineering
+
+The platform team shipped:
+
+- Streaming markdown renderer (issue #69)
+- Multi-region failover for the lock service
+- A new `ConcurrencyConfig.max_concurrent` enforcement
+  - sub-bullet: documented in the migration guide
+  - sub-bullet: covered by 47 new tests
+- Telemetry pipeline rewrite
+
+Ordered roadmap items:
+
+1. Finalize the parser swap (Option B)
+2. Land the test-fidelity baseline at 100%
+3. Ship 0.5.0 with the new defaults
+
+### Code samples
+
+```python
+from chat_sdk import Chat
+
+chat = Chat(adapter=SlackAdapter())
+
+@chat.on_mention
+async def handle(event):
+    await event.thread.post("hello")
+```
+
+```bash
+$ uv run pytest tests/ -q
+```
+
+### Edge cases worth checking
+
+- A `**bold` opened but not closed inside a sentence.
+- An italic `*partial` that should be repaired during streaming.
+- Word-internal asterisks like `5*3=15` (must not be italic).
+- A bullet item: `* this is the start of a list` is a marker, not italic.
+
+---
+
+## Section 3: References
+
+For background:
+
+- [Vercel chat SDK](https://github.com/vercel/chat)
+- [mdast specification](https://github.com/syntax-tree/mdast)
+- [remend npm package](https://www.npmjs.com/package/remend)
+
+An image for visual identity: ![logo](https://example.com/logo.png "Logo")
+
+A nested blockquote with rich formatting:
+
+> The *quick* brown **fox** jumps over the `lazy` dog.
+>
+> > Inside a nested quote with a [link](https://example.com).
+
+End of report.
diff --git a/tests/parser_spike/test_mdast_parity.py b/tests/parser_spike/test_mdast_parity.py
new file mode 100644
index 0000000..df78d7e
--- /dev/null
+++ b/tests/parser_spike/test_mdast_parity.py
@@ -0,0 +1,232 @@
+"""mdast parity bake-off: hand-rolled parser vs library candidates.
+
+For each library candidate, parse the fixture corpus and diff the
+resulting mdast tree against the baseline hand-rolled parser. The
+test does not fail on divergence -- this is a measurement harness,
+not an acceptance gate. Divergences are recorded so the spike report
+can show *which* node shapes each candidate gets wrong (and how badly).
+
+Run with verbose output to see the full divergence report:
+
+    uv run pytest tests/parser_spike/test_mdast_parity.py -s -v
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+# The spike candidate libraries live in the optional `spike-parser`
+# dependency group, which CI's default `uv sync --group dev` does NOT
+# install. Skip the whole module (rather than erroring at collection)
+# when they're absent so the standard test job stays green.
+pytest.importorskip("mistune")
+pytest.importorskip("markdown_it")
+pytest.importorskip("marko")
+
+from chat_sdk.shared.markdown_parser import parse_markdown as baseline_parse  # noqa: E402
+from chat_sdk.shared.parser_spike.markdown_it_translator import (  # noqa: E402
+    parse_markdown as markdown_it_parse,
+)
+from chat_sdk.shared.parser_spike.marko_translator import (  # noqa: E402
+    parse_markdown as marko_parse,
+)
+from chat_sdk.shared.parser_spike.mistune_translator import (  # noqa: E402
+    parse_markdown as mistune_parse,
+)
+
+CANDIDATES = [
+    ("mistune", mistune_parse),
+    ("markdown-it-py", markdown_it_parse),
+    ("marko", marko_parse),
+]
+
+
+# ---------------------------------------------------------------------------
+# Divergence reporter
+# ---------------------------------------------------------------------------
+
+
+def _walk(node: Any, path: str = "$") -> list[tuple[str, str, Any]]:
+    """Yield (path, kind, value) for each node-shape signal we care about.
+
+    Kind is one of: "type", "depth", "ordered", "start", "lang", "url",
+    "alt", "title", "value", "align", "child_count".
+    """
+    out: list[tuple[str, str, Any]] = []
+    if isinstance(node, dict):
+        t = node.get("type")
+        out.append((path, "type", t))
+        for key in ("depth", "ordered", "start", "lang", "url", "alt", "title", "value", "align"):
+            if key in node:
+                out.append((path, key, node[key]))
+        children = node.get("children")
+        if isinstance(children, list):
+            out.append((path, "child_count", len(children)))
+            for i, child in enumerate(children):
+                out.extend(_walk(child, f"{path}.children[{i}]"))
+    return out
+
+
+def _diff_trees(baseline: Any, candidate: Any) -> list[str]:
+    base_walk = _walk(baseline)
+    cand_walk = _walk(candidate)
+
+    base_index = {(p, k): v for (p, k, v) in base_walk}
+    cand_index = {(p, k): v for (p, k, v) in cand_walk}
+
+    diffs: list[str] = []
+    seen = set(base_index) | set(cand_index)
+    for key in sorted(seen):
+        path, kind = key
+        b = base_index.get(key, "<missing>")
+        c = cand_index.get(key, "<missing>")
+        if b != c:
+            diffs.append(f"  {path} [{kind}]: baseline={b!r} candidate={c!r}")
+    return diffs
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("name,parser", CANDIDATES)
+def test_candidate_produces_root_node(name: str, parser, mixed_content_markdown: str) -> None:
+    result = parser(mixed_content_markdown)
+    assert result["type"] == "root", f"{name} did not produce a root node"
+    assert isinstance(result.get("children"), list)
+
+
+@pytest.mark.parametrize("name,parser", CANDIDATES)
+def test_candidate_matches_top_level_block_types(name: str, parser, mixed_content_markdown: str) -> None:
+    baseline = baseline_parse(mixed_content_markdown)
+    candidate = parser(mixed_content_markdown)
+    baseline_types = [c.get("type") for c in baseline["children"]]
+    candidate_types = [c.get("type") for c in candidate["children"]]
+    # Don't assert equality -- different parsers may split paragraphs
+    # differently around HRs or trailing blank lines. We assert that the
+    # important constructs are all present in both.
+    important = {"heading", "table", "code", "list", "blockquote", "thematicBreak"}
+    base_important = [t for t in baseline_types if t in important]
+    cand_important = [t for t in candidate_types if t in important]
+    assert base_important == cand_important, (
+        f"{name} block-type sequence diverges:\n  baseline: {baseline_types}\n  {name}: {candidate_types}"
+    )
+
+
+def test_report_full_divergences(mixed_content_markdown: str) -> None:
+    """Print a full divergence report for each candidate. Always passes.
+
+    Run with ``pytest -s`` to see the report inline.
+    """
+    baseline = baseline_parse(mixed_content_markdown)
+    print("\n" + "=" * 70)
+    print("mdast divergence report")
+    print("=" * 70)
+    for name, parser in CANDIDATES:
+        candidate = parser(mixed_content_markdown)
+        diffs = _diff_trees(baseline, candidate)
+        print(f"\n[{name}] {len(diffs)} divergence(s)")
+        if diffs:
+            for line in diffs[:30]:  # cap noise
+                print(line)
+            if len(diffs) > 30:
+                print(f"  ... +{len(diffs) - 30} more")
+
+
+def test_dump_baseline_tree_size(mixed_content_markdown: str) -> None:
+    """Sanity: the fixture exercises enough of the AST to be meaningful."""
+    baseline = baseline_parse(mixed_content_markdown)
+    nodes = _walk(baseline)
+    # ~200+ shape signals = at least a couple dozen non-trivial nodes.
+    assert len(nodes) > 150, f"Fixture is too small to be a useful bake-off (only {len(nodes)} signals)"
+    # Spot-check the constructs the fixture should contain.
+    types = {sig for (_, kind, sig) in nodes if kind == "type"}
+    required_types = (
+        "heading",
+        "paragraph",
+        "code",
+        "list",
+        "table",
+        "blockquote",
+        "thematicBreak",
+        "strong",
+        "emphasis",
+        "link",
+    )
+    for required in required_types:
+        assert required in types, f"Fixture missing required node type: {required}"
+
+
+# ---------------------------------------------------------------------------
+# Completeness gap (what each parser actually recognises on hard constructs)
+# ---------------------------------------------------------------------------
+
+
+def _collect_recognised_types(node: Any) -> set[str]:
+    """Set of all `type` values appearing anywhere in the tree."""
+    found: set[str] = set()
+    if isinstance(node, dict):
+        t = node.get("type")
+        if isinstance(t, str):
+            found.add(t)
+        for child in node.get("children") or []:
+            found |= _collect_recognised_types(child)
+    return found
+
+
+# Construct -> expected mdast `type` (or set of types) when recognised.
+# A parser that returns *none* of these for the gap fixture has silently
+# flattened the construct to paragraph/text. The baseline is documented
+# as not handling any of these, so it sets the floor.
+GAP_CONSTRUCTS: dict[str, set[str]] = {
+    "setext heading": {"heading"},  # heading must appear from a setext source
+    "indented code block": {"code"},  # raw 4-space indented block
+    "footnote definition": {"footnoteDefinition", "footnoteReference"},
+    "inline HTML": {"html", "inlineHTML"},
+    "task list item": {"listItem"},  # mdast: listItem with `checked` attr
+    "definition list": {"definition", "descriptionList", "termTitle"},
+}
+
+
+def test_report_completeness_gap(gap_cases_markdown: str) -> None:
+    """Print which gap constructs each parser actually recognised.
+
+    The baseline parser is *known* to not handle these (see
+    docs/UPSTREAM_SYNC.md non-parity table). This report quantifies how
+    many it silently drops vs each library candidate.
+
+    Run with ``pytest -s`` to see the report inline.
+    """
+    print("\n" + "=" * 70)
+    print("Completeness gap report (gap_cases.md)")
+    print("=" * 70)
+
+    parsers = [("baseline (hand)", baseline_parse), *CANDIDATES]
+    rows: list[tuple[str, set[str]]] = []
+    for name, parser in parsers:
+        types = _collect_recognised_types(parser(gap_cases_markdown))
+        rows.append((name, types))
+
+    # Construct table: rows = constructs, columns = parsers
+    print(f"\n{'construct':<24}", end="")
+    for name, _ in rows:
+        print(f" {name[:14]:>15}", end="")
+    print()
+    print("-" * (24 + 16 * len(rows)))
+
+    for construct, expected in GAP_CONSTRUCTS.items():
+        print(f"{construct:<24}", end="")
+        for _, types in rows:
+            recognised = bool(expected & types)
+            print(f" {'recognised' if recognised else 'silent drop':>15}", end="")
+        print()
+
+    print()
+    # Per-parser unique-type counts on this fixture
+    print("Distinct mdast types emitted on gap fixture:")
+    for name, types in rows:
+        type_list = sorted(types)
+        print(f"  {name:<20} {len(type_list):>2}  -> {type_list}")