From 8c0c79f0556caf6ce76ba9b902c550fd929a06dd Mon Sep 17 00:00:00 2001
From: Leynos <leynos@troubledskies.net>
Date: Wed, 30 Jul 2025 02:00:52 +0100
Subject: [PATCH 1/5] Fix instructions typo and simplify wrap prefixes

---
 AGENTS.md   |  9 +++--
 src/wrap.rs | 94 +++++++++++++++++++----------------------------------
 2 files changed, 38 insertions(+), 65 deletions(-)
diff --git a/AGENTS.md b/AGENTS.md
index 06f65d12..fb33d43f 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -28,11 +28,10 @@
   examples demonstrating the usage and outcome of the function. Test
   documentation should omit examples where the example serves only to reiterate
   the test logic.
-- **Keep file size managable.** No single code file may be longer than 400
-  lines.
-  Long switch statements or dispatch tables should be broken up by feature and
-  constituents colocated with targets. Large blocks of test data should be
-  moved to external data files.
+- **Keep file size manageable.** No single code file may be longer than 400
+  lines. Long switch statements or dispatch tables should be broken up by
+  feature and constituents colocated with targets. Large blocks of test data
+  should be moved to external data files.
 
 ## Documentation Maintenance
 
diff --git a/src/wrap.rs b/src/wrap.rs
index cfd1431b..a656f5cb 100644
--- a/src/wrap.rs
+++ b/src/wrap.rs
@@ -4,55 +4,25 @@
 //! `docs/architecture.md` and uses the `unicode-width` crate for accurate
 //! display calculations.
 
-use regex::{Captures, Regex};
+use regex::Regex;
 
 static FENCE_RE: std::sync::LazyLock<Regex> =
-    std::sync::LazyLock::new(|| Regex::new(r"^\s*(```|~~~).*").unwrap());
+    lazy_regex!(r"^\s*(```|~~~).*", "fence pattern regex should compile",);
 
-static BULLET_RE: std::sync::LazyLock<Regex> =
-    std::sync::LazyLock::new(|| Regex::new(r"^(\s*(?:[-*+]|\d+[.)])\s+)(.*)").unwrap());
+static BULLET_RE: std::sync::LazyLock<Regex> = lazy_regex!(
+    r"^(\s*(?:[-*+]|\d+[.)])\s+)(.*)",
+    "bullet pattern regex should compile",
+);
 
-static FOOTNOTE_RE: std::sync::LazyLock<Regex> =
-    std::sync::LazyLock::new(|| Regex::new(r"^(\s*)(\[\^[^]]+\]:\s*)(.*)$").unwrap());
+static FOOTNOTE_RE: std::sync::LazyLock<Regex> = lazy_regex!(
+    r"^(\s*)(\[\^[^]]+\]:\s*)(.*)$",
+    "footnote pattern regex should compile",
+);
 
-static BLOCKQUOTE_RE: std::sync::LazyLock<Regex> =
-    std::sync::LazyLock::new(|| Regex::new(r"^(\s*(?:>\s*)+)(.*)$").unwrap());
-
-struct PrefixHandler {
-    re: &'static std::sync::LazyLock<Regex>,
-    is_bq: bool,
-    build_prefix: fn(&Captures) -> String,
-    rest_group: usize,
-}
-
-impl PrefixHandler {
-    fn build_bullet_prefix(cap: &Captures) -> String { cap[1].to_string() }
-
-    fn build_footnote_prefix(cap: &Captures) -> String { format!("{}{}", &cap[1], &cap[2]) }
-
-    fn build_blockquote_prefix(cap: &Captures) -> String { cap[1].to_string() }
-}
-
-static HANDLERS: &[PrefixHandler] = &[
-    PrefixHandler {
-        re: &BULLET_RE,
-        is_bq: false,
-        build_prefix: PrefixHandler::build_bullet_prefix,
-        rest_group: 2,
-    },
-    PrefixHandler {
-        re: &FOOTNOTE_RE,
-        is_bq: false,
-        build_prefix: PrefixHandler::build_footnote_prefix,
-        rest_group: 3,
-    },
-    PrefixHandler {
-        re: &BLOCKQUOTE_RE,
-        is_bq: true,
-        build_prefix: PrefixHandler::build_blockquote_prefix,
-        rest_group: 2,
-    },
-];
+static BLOCKQUOTE_RE: std::sync::LazyLock<Regex> = lazy_regex!(
+    r"^(\s*(?:>\s*)+)(.*)$",
+    "blockquote pattern regex should compile",
+);
 
 /// Markdown token emitted by [`tokenize_markdown`].
 #[derive(Debug, PartialEq)]
@@ -390,7 +360,7 @@ pub fn wrap_text(lines: &[String], width: usize) -> Vec<String> {
     let mut indent = String::new();
     let mut in_code = false;
 
-    'line_loop: for line in lines {
+    for line in lines {
         if FENCE_RE.is_match(line) {
             flush_paragraph(&mut out, &buf, &indent, width);
             buf.clear();
@@ -429,21 +399,25 @@ pub fn wrap_text(lines: &[String], width: usize) -> Vec<String> {
             continue;
         }
 
-        for handler in HANDLERS {
-            if let Some(cap) = handler.re.captures(line) {
-                let prefix = (handler.build_prefix)(&cap);
-                let rest = cap.get(handler.rest_group).unwrap().as_str();
-                handle_prefix_line(
-                    &mut out,
-                    &mut buf,
-                    &mut indent,
-                    width,
-                    &prefix,
-                    rest,
-                    handler.is_bq,
-                );
-                continue 'line_loop;
-            }
+        if let Some(cap) = BULLET_RE.captures(line) {
+            let prefix = cap.get(1).unwrap().as_str();
+            let rest = cap.get(2).unwrap().as_str();
+            handle_prefix_line(&mut out, &mut buf, &mut indent, width, prefix, rest, false);
+            continue;
+        }
+
+        if let Some(cap) = FOOTNOTE_RE.captures(line) {
+            let prefix = format!("{}{}", &cap[1], &cap[2]);
+            let rest = cap.get(3).unwrap().as_str();
+            handle_prefix_line(&mut out, &mut buf, &mut indent, width, &prefix, rest, false);
+            continue;
+        }
+
+        if let Some(cap) = BLOCKQUOTE_RE.captures(line) {
+            let prefix = cap.get(1).unwrap().as_str();
+            let rest = cap.get(2).unwrap().as_str();
+            handle_prefix_line(&mut out, &mut buf, &mut indent, width, prefix, rest, true);
+            continue;
         }
 
         if buf.is_empty() {

From 1fddf48a045d88446613c7a65ee3da1a51ee529d Mon Sep 17 00:00:00 2001
From: Leynos <leynos@troubledskies.net>
Date: Wed, 30 Jul 2025 02:23:42 +0100
Subject: [PATCH 2/5] Refactor tokenization and prefix handling

---
 src/lib.rs         |   1 +
 src/tokenize.rs    | 155 +++++++++++++++++++++++
 src/wrap.rs        | 304 ++-------------------------------------------
 tests/wrap_unit.rs | 106 ++++++++++++++++
 4 files changed, 275 insertions(+), 291 deletions(-)
 create mode 100644 src/tokenize.rs
 create mode 100644 tests/wrap_unit.rs

diff --git a/src/lib.rs b/src/lib.rs
index 9ae0f228..138c95b6 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -29,6 +29,7 @@ pub mod lists;
 pub mod process;
 mod reflow;
 pub mod table;
+mod tokenize;
 pub mod wrap;
 
 #[doc(hidden)]
diff --git a/src/tokenize.rs b/src/tokenize.rs
new file mode 100644
index 00000000..05c28c5c
--- /dev/null
+++ b/src/tokenize.rs
@@ -0,0 +1,155 @@
+//! Tokenization utilities for Markdown wrapping.
+//!
+//! Provides `Token` and helpers to parse inline code, links, and fences.
+
+use std::sync::LazyLock;
+
+use regex::Regex;
+
+static FENCE_RE: LazyLock<Regex> =
+    lazy_regex!(r"^\s*(```|~~~).*", "fence pattern regex should compile");
+
+/// Markdown token emitted by [`tokenize_markdown`].
+#[derive(Debug, PartialEq)]
+pub enum Token<'a> {
+    /// Line within a fenced code block, including the fence itself.
+    Fence(&'a str),
+    /// Inline code span without surrounding backticks.
+    Code(&'a str),
+    /// Plain text outside code regions.
+    Text(&'a str),
+    /// Line break separating tokens.
+    Newline,
+}
+
+fn parse_link_or_image(chars: &[char], mut i: usize) -> (String, usize) {
+    let start = i;
+    if chars[i] == '!' {
+        i += 1;
+    }
+    // skip initial '[' which we know is present
+    i += 1;
+    while i < chars.len() && chars[i] != ']' {
+        i += 1;
+    }
+    if i < chars.len() && chars[i] == ']' {
+        i += 1;
+        if i < chars.len() && chars[i] == '(' {
+            i += 1;
+            let mut depth = 1;
+            while i < chars.len() && depth > 0 {
+                match chars[i] {
+                    '(' => depth += 1,
+                    ')' => depth -= 1,
+                    _ => {}
+                }
+                i += 1;
+            }
+            let tok: String = chars[start..i].iter().collect();
+            return (tok, i);
+        }
+    }
+    let tok: String = chars[start..=start].iter().collect();
+    (tok, start + 1)
+}
+
+#[must_use]
+pub fn tokenize_inline(text: &str) -> Vec<String> {
+    let mut tokens = Vec::new();
+    let chars: Vec<char> = text.chars().collect();
+    let mut i = 0;
+    while i < chars.len() {
+        let c = chars[i];
+        if c.is_whitespace() {
+            let start = i;
+            while i < chars.len() && chars[i].is_whitespace() {
+                i += 1;
+            }
+            tokens.push(chars[start..i].iter().collect());
+        } else if c == '`' {
+            let start = i;
+            let mut delim_len = 0;
+            while i < chars.len() && chars[i] == '`' {
+                i += 1;
+                delim_len += 1;
+            }
+            let mut end = i;
+            while end < chars.len() {
+                if chars[end] == '`' {
+                    let mut j = end;
+                    let mut count = 0;
+                    while j < chars.len() && chars[j] == '`' {
+                        j += 1;
+                        count += 1;
+                    }
+                    if count == delim_len {
+                        end = j;
+                        break;
+                    }
+                }
+                end += 1;
+            }
+            if end >= chars.len() {
+                tokens.push(chars[start..start + delim_len].iter().collect());
+                i = start + delim_len;
+            } else {
+                tokens.push(chars[start..end].iter().collect());
+                i = end;
+            }
+        } else if c == '[' || (c == '!' && i + 1 < chars.len() && chars[i + 1] == '[') {
+            let (tok, new_i) = parse_link_or_image(&chars, i);
+            tokens.push(tok);
+            i = new_i;
+        } else {
+            let start = i;
+            while i < chars.len() && !chars[i].is_whitespace() && chars[i] != '`' {
+                i += 1;
+            }
+            tokens.push(chars[start..i].iter().collect());
+        }
+    }
+    tokens
+}
+
+/// Split the input string into [`Token`]s by analysing whitespace and backtick delimiters.
+pub fn tokenize_markdown(input: &str) -> Vec<Token<'_>> {
+    let mut out = Vec::new();
+    let mut in_fence = false;
+    for line in input.split_inclusive('\n') {
+        let trimmed = line.trim_end_matches('\n');
+        if FENCE_RE.is_match(trimmed) {
+            out.push(Token::Fence(trimmed));
+            out.push(Token::Newline);
+            in_fence = !in_fence;
+            continue;
+        }
+        if in_fence {
+            out.push(Token::Fence(trimmed));
+            out.push(Token::Newline);
+            continue;
+        }
+        let mut rest = trimmed;
+        while let Some(pos) = rest.find('`') {
+            if pos > 0 {
+                out.push(Token::Text(&rest[..pos]));
+            }
+            if let Some(end) = rest[pos + 1..].find('`') {
+                out.push(Token::Code(&rest[pos + 1..pos + 1 + end]));
+                rest = &rest[pos + end + 2..];
+            } else {
+                out.push(Token::Text(&rest[pos..]));
+                rest = "";
+                break;
+            }
+        }
+        if !rest.is_empty() {
+            out.push(Token::Text(rest));
+        }
+        out.push(Token::Newline);
+    }
+    out.pop();
+    out
+}
+
+#[doc(hidden)]
+pub fn is_fence(line: &str) -> bool { FENCE_RE.is_match(line) }
diff --git a/src/wrap.rs b/src/wrap.rs
index a656f5cb..2be560c8 100644
--- a/src/wrap.rs
+++ b/src/wrap.rs
@@ -6,8 +6,7 @@
 
 use regex::Regex;
 
-static FENCE_RE: std::sync::LazyLock<Regex> =
-    lazy_regex!(r"^\s*(```|~~~).*", "fence pattern regex should compile",);
+pub use crate::tokenize::{Token, is_fence, tokenize_inline, tokenize_markdown};
 
 static BULLET_RE: std::sync::LazyLock<Regex> = lazy_regex!(
     r"^(\s*(?:[-*+]|\d+[.)])\s+)(.*)",
@@ -24,176 +23,10 @@ static BLOCKQUOTE_RE: std::sync::LazyLock<Regex> = lazy_regex!(
     "blockquote pattern regex should compile",
 );
 
-/// Markdown token emitted by [`tokenize_markdown`].
-#[derive(Debug, PartialEq)]
-pub enum Token<'a> {
-    /// Line within a fenced code block, including the fence itself.
-    Fence(&'a str),
-    /// Inline code span without surrounding backticks.
-    Code(&'a str),
-    /// Plain text outside code regions.
-    Text(&'a str),
-    /// Line break separating tokens.
-    Newline,
-}
-
-fn parse_link_or_image(chars: &[char], mut i: usize) -> (String, usize) {
-    let start = i;
-    if chars[i] == '!' {
-        i += 1;
-    }
-    // skip initial '[' which we know is present
-    i += 1;
-    while i < chars.len() && chars[i] != ']' {
-        i += 1;
-    }
-    if i < chars.len() && chars[i] == ']' {
-        i += 1;
-        if i < chars.len() && chars[i] == '(' {
-            i += 1;
-            let mut depth = 1;
-            while i < chars.len() && depth > 0 {
-                match chars[i] {
-                    '(' => depth += 1,
-                    ')' => depth -= 1,
-                    _ => {}
-                }
-                i += 1;
-            }
-            let tok: String = chars[start..i].iter().collect();
-            return (tok, i);
-        }
-    }
-    let tok: String = chars[start..=start].iter().collect();
-    (tok, start + 1)
-}
-
-fn tokenize_inline(text: &str) -> Vec<String> {
-    let mut tokens = Vec::new();
-    let chars: Vec<char> = text.chars().collect();
-    let mut i = 0;
-    while i < chars.len() {
-        let c = chars[i];
-        if c.is_whitespace() {
-            let start = i;
-            while i < chars.len() && chars[i].is_whitespace() {
-                i += 1;
-            }
-            tokens.push(chars[start..i].iter().collect());
-        } else if c == '`' {
-            let start = i;
-            let mut delim_len = 0;
-            while i < chars.len() && chars[i] == '`' {
-                i += 1;
-                delim_len += 1;
-            }
-            let mut end = i;
-            while end < chars.len() {
-                if chars[end] == '`' {
-                    let mut j = end;
-                    let mut count = 0;
-                    while j < chars.len() && chars[j] == '`' {
-                        j += 1;
-                        count += 1;
-                    }
-                    if count == delim_len {
-                        end = j;
-                        break;
-                    }
-                }
-                end += 1;
-            }
-            if end >= chars.len() {
-                tokens.push(chars[start..start + delim_len].iter().collect());
-                i = start + delim_len;
-            } else {
-                tokens.push(chars[start..end].iter().collect());
-                i = end;
-            }
-        } else if c == '[' || (c == '!' && i + 1 < chars.len() && chars[i + 1] == '[') {
-            let (tok, new_i) = parse_link_or_image(&chars, i);
-            tokens.push(tok);
-            i = new_i;
-        } else {
-            let start = i;
-            while i < chars.len() && !chars[i].is_whitespace() && chars[i] != '`' {
-                i += 1;
-            }
-            tokens.push(chars[start..i].iter().collect());
-        }
-    }
-    tokens
-}
-
-/// Split the input string into [`Token`]s by analysing whitespace and
-/// backtick delimiters.
-///
-/// The tokenizer groups consecutive whitespace into a single
-/// [`Token::Text`] and recognises backtick sequences as inline code spans.
-/// When a run of backticks is encountered the parser searches forward for an
-/// identical delimiter, allowing nested backticks when the span uses a longer
-/// fence. Unmatched delimiter sequences are treated as literal text.
-///
-/// ```rust,ignore
-/// use mdtablefix::wrap::{Token, tokenize_markdown};
-///
-/// let tokens = tokenize_markdown("Example with `code`");
-/// assert_eq!(
-///     tokens,
-///     vec![Token::Text("Example with "), Token::Code("code")]
-/// );
-/// ```
-pub(crate) fn tokenize_markdown(input: &str) -> Vec<Token<'_>> {
-    let mut out = Vec::new();
-    let mut in_fence = false;
-    for line in input.split_inclusive('\n') {
-        let trimmed = line.trim_end_matches('\n');
-        if FENCE_RE.is_match(trimmed) {
-            out.push(Token::Fence(trimmed));
-            out.push(Token::Newline);
-            in_fence = !in_fence;
-            continue;
-        }
-        if in_fence {
-            out.push(Token::Fence(trimmed));
-            out.push(Token::Newline);
-            continue;
-        }
-        let mut rest = trimmed;
-        while let Some(pos) = rest.find('`') {
-            if pos > 0 {
-                out.push(Token::Text(&rest[..pos]));
-            }
-            if let Some(end) = rest[pos + 1..].find('`') {
-                out.push(Token::Code(&rest[pos + 1..pos + 1 + end]));
-                rest = &rest[pos + end + 2..];
-            } else {
-                out.push(Token::Text(&rest[pos..]));
-                rest = "";
-                break;
-            }
-        }
-        if !rest.is_empty() {
-            out.push(Token::Text(rest));
-        }
-        out.push(Token::Newline);
-    }
-    out.pop();
-    out
-}
-
 /// Determine if the current line should break at the last whitespace.
 ///
 /// Returns `true` if `current_width` exceeds `width` and a whitespace split
 /// position is available.
-///
-/// # Examples
-///
-/// ```ignore
-/// use mdtablefix::wrap::should_break_line;
-/// assert!(should_break_line(10, 12, Some(3)));
-/// assert!(!should_break_line(10, 8, Some(3)));
-/// ```
 fn should_break_line(width: usize, current_width: usize, last_split: Option<usize>) -> bool {
     current_width > width && last_split.is_some()
 }
@@ -273,9 +106,6 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec<String> {
     lines
 }
 
-#[doc(hidden)]
-pub fn is_fence(line: &str) -> bool { FENCE_RE.is_match(line) }
-
 fn flush_paragraph(out: &mut Vec<String>, buf: &[(String, bool)], indent: &str, width: usize) {
     if buf.is_empty() {
         return;
@@ -361,7 +191,7 @@ pub fn wrap_text(lines: &[String], width: usize) -> Vec<String> {
     let mut in_code = false;
 
     for line in lines {
-        if FENCE_RE.is_match(line) {
+        if is_fence(line) {
             flush_paragraph(&mut out, &buf, &indent, width);
             buf.clear();
             indent.clear();
@@ -400,22 +230,28 @@ pub fn wrap_text(lines: &[String], width: usize) -> Vec<String> {
         }
 
         if let Some(cap) = BULLET_RE.captures(line) {
-            let prefix = cap.get(1).unwrap().as_str();
-            let rest = cap.get(2).unwrap().as_str();
+            let prefix = cap.get(1).expect("bullet regex capture").as_str();
+            let rest = cap.get(2).expect("bullet regex remainder capture").as_str();
             handle_prefix_line(&mut out, &mut buf, &mut indent, width, prefix, rest, false);
             continue;
         }
 
         if let Some(cap) = FOOTNOTE_RE.captures(line) {
             let prefix = format!("{}{}", &cap[1], &cap[2]);
-            let rest = cap.get(3).unwrap().as_str();
+            let rest = cap
+                .get(3)
+                .expect("footnote regex remainder capture")
+                .as_str();
             handle_prefix_line(&mut out, &mut buf, &mut indent, width, &prefix, rest, false);
             continue;
         }
 
         if let Some(cap) = BLOCKQUOTE_RE.captures(line) {
-            let prefix = cap.get(1).unwrap().as_str();
-            let rest = cap.get(2).unwrap().as_str();
+            let prefix = cap.get(1).expect("blockquote prefix capture").as_str();
+            let rest = cap
+                .get(2)
+                .expect("blockquote regex remainder capture")
+                .as_str();
             handle_prefix_line(&mut out, &mut buf, &mut indent, width, prefix, rest, true);
             continue;
         }
@@ -452,117 +288,3 @@ pub fn wrap_text(lines: &[String], width: usize) -> Vec<String> {
     flush_paragraph(&mut out, &buf, &indent, width);
     out
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn wrap_text_preserves_hyphenated_words() {
-        let input = vec!["A word that is very-long-word indeed".to_string()];
-        let wrapped = wrap_text(&input, 20);
-        assert_eq!(
-            wrapped,
-            vec![
-                "A word that is".to_string(),
-                "very-long-word".to_string(),
-                "indeed".to_string(),
-            ]
-        );
-    }
-
-    #[test]
-    fn wrap_text_does_not_insert_spaces_in_hyphenated_words() {
-        let input = vec![
-            concat!(
-                "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt ",
-                "elit-sed fermentum congue. Vivamus dictum nulla sed consectetur ",
-                "volutpat."
-            )
-            .to_string(),
-        ];
-        let wrapped = wrap_text(&input, 80);
-        assert_eq!(
-            wrapped,
-            vec![
-                "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt"
-                    .to_string(),
-                "elit-sed fermentum congue. Vivamus dictum nulla sed consectetur volutpat."
-                    .to_string(),
-            ]
-        );
-    }
-
-    #[test]
-    fn wrap_text_preserves_code_spans() {
-        let input = vec![
-            "with their own escaping rules. On Windows, scripts default to `powershell -Command` \
-             unless the manifest's `interpreter` field overrides the setting."
-                .to_string(),
-        ];
-        let wrapped = wrap_text(&input, 60);
-        assert_eq!(
-            wrapped,
-            vec![
-                "with their own escaping rules. On Windows, scripts default".to_string(),
-                "to `powershell -Command` unless the manifest's".to_string(),
-                "`interpreter` field overrides the setting.".to_string(),
-            ]
-        );
-    }
-
-    #[test]
-    fn wrap_text_multiple_code_spans() {
-        let input = vec!["combine `foo bar` and `baz qux` in one line".to_string()];
-        let wrapped = wrap_text(&input, 25);
-        assert_eq!(
-            wrapped,
-            vec![
-                "combine `foo bar` and".to_string(),
-                "`baz qux` in one line".to_string(),
-            ]
-        );
-    }
-
-    #[test]
-    fn wrap_text_nested_backticks() {
-        let input = vec!["Use `` `code` `` to quote backticks".to_string()];
-        let wrapped = wrap_text(&input, 20);
-        assert_eq!(
-            wrapped,
-            vec![
-                "Use `` `code` `` to".to_string(),
-                "quote backticks".to_string()
-            ]
-        );
-    }
-
-    #[test]
-    fn wrap_text_unmatched_backticks() {
-        let input = vec!["This has a `dangling code span.".to_string()];
-        let wrapped = wrap_text(&input, 20);
-        assert_eq!(
-            wrapped,
-            vec!["This has a".to_string(), "`dangling code span.".to_string()]
-        );
-    }
-
-    #[test]
-    fn wrap_text_preserves_links() {
-        let input = vec![
-            "`falcon-pachinko` is an extension library for the".to_string(),
-            "[Falcon](https://falcon.readthedocs.io) web framework. It adds a structured"
-                .to_string(),
-            "approach to asynchronous WebSocket routing and background worker integration."
-                .to_string(),
-        ];
-        let wrapped = wrap_text(&input, 80);
-        let joined = wrapped.join("\n");
-        assert_eq!(joined.matches("https://").count(), 1);
-        assert!(
-            wrapped
-                .iter()
-                .any(|l| l.contains("https://falcon.readthedocs.io"))
-        );
-    }
-}
diff --git a/tests/wrap_unit.rs b/tests/wrap_unit.rs
new file mode 100644
index 00000000..7f127cf9
--- /dev/null
+++ b/tests/wrap_unit.rs
@@ -0,0 +1,106 @@
+use mdtablefix::wrap::wrap_text;
+
+#[test]
+fn wrap_text_preserves_hyphenated_words() {
+    let input = vec!["A word that is very-long-word indeed".to_string()];
+    let wrapped = wrap_text(&input, 20);
+    assert_eq!(
+        wrapped,
+        vec![
+            "A word that is".to_string(),
+            "very-long-word".to_string(),
+            "indeed".to_string(),
+        ]
+    );
+}
+
+#[test]
+fn wrap_text_does_not_insert_spaces_in_hyphenated_words() {
+    let input = vec![
+        concat!(
+            "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt ",
+            "elit-sed fermentum congue. Vivamus dictum nulla sed consectetur ",
+            "volutpat."
+        )
+        .to_string(),
+    ];
+    let wrapped = wrap_text(&input, 80);
+    assert_eq!(
+        wrapped,
+        vec![
+            "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt".to_string(),
+            "elit-sed fermentum congue. Vivamus dictum nulla sed consectetur volutpat.".to_string(),
+        ]
+    );
+}
+
+#[test]
+fn wrap_text_preserves_code_spans() {
+    let input = vec![
+        "with their own escaping rules. On Windows, scripts default to `powershell -Command` \
+         unless the manifest's `interpreter` field overrides the setting."
+            .to_string(),
+    ];
+    let wrapped = wrap_text(&input, 60);
+    assert_eq!(
+        wrapped,
+        vec![
+            "with their own escaping rules. On Windows, scripts default".to_string(),
+            "to `powershell -Command` unless the manifest's".to_string(),
+            "`interpreter` field overrides the setting.".to_string(),
+        ]
+    );
+}
+
+#[test]
+fn wrap_text_multiple_code_spans() {
+    let input = vec!["combine `foo bar` and `baz qux` in one line".to_string()];
+    let wrapped = wrap_text(&input, 25);
+    assert_eq!(
+        wrapped,
+        vec![
+            "combine `foo bar` and".to_string(),
+            "`baz qux` in one line".to_string(),
+        ]
+    );
+}
+
+#[test]
+fn wrap_text_nested_backticks() {
+    let input = vec!["Use `` `code` `` to quote backticks".to_string()];
+    let wrapped = wrap_text(&input, 20);
+    assert_eq!(
+        wrapped,
+        vec![
+            "Use `` `code` `` to".to_string(),
+            "quote backticks".to_string()
+        ]
+    );
+}
+
+#[test]
+fn wrap_text_unmatched_backticks() {
+    let input = vec!["This has a `dangling code span.".to_string()];
+    let wrapped = wrap_text(&input, 20);
+    assert_eq!(
+        wrapped,
+        vec!["This has a".to_string(), "`dangling code span.".to_string()]
+    );
+}
+
+#[test]
+fn wrap_text_preserves_links() {
+    let input = vec![
+        "`falcon-pachinko` is an extension library for the".to_string(),
+        "[Falcon](https://falcon.readthedocs.io) web framework. It adds a structured".to_string(),
+        "approach to asynchronous WebSocket routing and background worker integration.".to_string(),
+    ];
+    let wrapped = wrap_text(&input, 80);
+    let joined = wrapped.join("\n");
+    assert_eq!(joined.matches("https://").count(), 1);
+    assert!(
+        wrapped
+            .iter()
+            .any(|l| l.contains("https://falcon.readthedocs.io"))
+    );
+}

From 28908d7eb8ec984d3f71d9b2e22a05b83c52ce37 Mon Sep 17 00:00:00 2001
From: Leynos <leynos@troubledskies.net>
Date: Sat, 2 Aug 2025 00:29:50 +0100
Subject: [PATCH 3/5] Remove unused tokenizer and tidy wrap helpers (#174)

---
 src/html.rs     |   4 +-
 src/io.rs       |   4 +-
 src/lib.rs      |   1 -
 src/tokenize.rs | 155 ------------------------------------------------
 src/wrap.rs     |  51 +---------------
 5 files changed, 9 insertions(+), 206 deletions(-)
 delete mode 100644 src/tokenize.rs

diff --git a/src/html.rs b/src/html.rs
index 2742d7b3..b148e314 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -84,7 +84,9 @@ fn is_element(handle: &Handle, tag: &str) -> bool {
 }
 
 /// Returns `true` if `handle` represents a `<td>` or `<th>` element.
-fn is_table_cell(handle: &Handle) -> bool { is_element(handle, "td") || is_element(handle, "th") }
+fn is_table_cell(handle: &Handle) -> bool {
+    is_element(handle, "td") || is_element(handle, "th")
+}
 
 /// Walks the DOM tree collecting `<table>` nodes under `handle`.
 fn collect_tables(handle: &Handle, tables: &mut Vec<Handle>) {
diff --git a/src/io.rs b/src/io.rs
index cb30bea4..e9bd9c17 100644
--- a/src/io.rs
+++ b/src/io.rs
@@ -30,7 +30,9 @@ where
 ///
 /// # Errors
 /// Returns an error if reading or writing the file fails.
-pub fn rewrite(path: &Path) -> std::io::Result<()> { rewrite_with(path, process_stream) }
+pub fn rewrite(path: &Path) -> std::io::Result<()> {
+    rewrite_with(path, process_stream)
+}
 
 /// Rewrite a file in place without wrapping text.
 ///
diff --git a/src/lib.rs b/src/lib.rs
index c7e7678d..3edae610 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -30,7 +30,6 @@ pub mod lists;
 pub mod process;
 mod reflow;
 pub mod table;
-mod tokenize;
 pub mod textproc;
 pub mod wrap;
 
diff --git a/src/tokenize.rs b/src/tokenize.rs
deleted file mode 100644
index 05c28c5c..00000000
--- a/src/tokenize.rs
+++ /dev/null
@@ -1,155 +0,0 @@
-//! Tokenization utilities for Markdown wrapping.
-//!
-//! Provides `Token` and helpers to parse inline code, links, and fences.
-
-use std::sync::LazyLock;
-
-use regex::Regex;
-
-static FENCE_RE: LazyLock<Regex> =
-    lazy_regex!(r"^\s*(```|~~~).*", "fence pattern regex should compile");
-
-/// Markdown token emitted by [`tokenize_markdown`].
-#[derive(Debug, PartialEq)]
-pub enum Token<'a> {
-    /// Line within a fenced code block, including the fence itself.
-    Fence(&'a str),
-    /// Inline code span without surrounding backticks.
-    Code(&'a str),
-    /// Plain text outside code regions.
-    Text(&'a str),
-    /// Line break separating tokens.
-    Newline,
-}
-
-fn parse_link_or_image(chars: &[char], mut i: usize) -> (String, usize) {
-    let start = i;
-    if chars[i] == '!' {
-        i += 1;
-    }
-    // skip initial '[' which we know is present
-    i += 1;
-    while i < chars.len() && chars[i] != ']' {
-        i += 1;
-    }
-    if i < chars.len() && chars[i] == ']' {
-        i += 1;
-        if i < chars.len() && chars[i] == '(' {
-            i += 1;
-            let mut depth = 1;
-            while i < chars.len() && depth > 0 {
-                match chars[i] {
-                    '(' => depth += 1,
-                    ')' => depth -= 1,
-                    _ => {}
-                }
-                i += 1;
-            }
-            let tok: String = chars[start..i].iter().collect();
-            return (tok, i);
-        }
-    }
-    let tok: String = chars[start..=start].iter().collect();
-    (tok, start + 1)
-}
-
-#[must_use]
-pub fn tokenize_inline(text: &str) -> Vec<String> {
-    let mut tokens = Vec::new();
-    let chars: Vec<char> = text.chars().collect();
-    let mut i = 0;
-    while i < chars.len() {
-        let c = chars[i];
-        if c.is_whitespace() {
-            let start = i;
-            while i < chars.len() && chars[i].is_whitespace() {
-                i += 1;
-            }
-            tokens.push(chars[start..i].iter().collect());
-        } else if c == '`' {
-            let start = i;
-            let mut delim_len = 0;
-            while i < chars.len() && chars[i] == '`' {
-                i += 1;
-                delim_len += 1;
-            }
-            let mut end = i;
-            while end < chars.len() {
-                if chars[end] == '`' {
-                    let mut j = end;
-                    let mut count = 0;
-                    while j < chars.len() && chars[j] == '`' {
-                        j += 1;
-                        count += 1;
-                    }
-                    if count == delim_len {
-                        end = j;
-                        break;
-                    }
-                }
-                end += 1;
-            }
-            if end >= chars.len() {
-                tokens.push(chars[start..start + delim_len].iter().collect());
-                i = start + delim_len;
-            } else {
-                tokens.push(chars[start..end].iter().collect());
-                i = end;
-            }
-        } else if c == '[' || (c == '!' && i + 1 < chars.len() && chars[i + 1] == '[') {
-            let (tok, new_i) = parse_link_or_image(&chars, i);
-            tokens.push(tok);
-            i = new_i;
-        } else {
-            let start = i;
-            while i < chars.len() && !chars[i].is_whitespace() && chars[i] != '`' {
-                i += 1;
-            }
-            tokens.push(chars[start..i].iter().collect());
-        }
-    }
-    tokens
-}
-
-/// Split the input string into [`Token`]s by analysing whitespace and backtick delimiters.
-pub fn tokenize_markdown(input: &str) -> Vec<Token<'_>> {
-    let mut out = Vec::new();
-    let mut in_fence = false;
-    for line in input.split_inclusive('\n') {
-        let trimmed = line.trim_end_matches('\n');
-        if FENCE_RE.is_match(trimmed) {
-            out.push(Token::Fence(trimmed));
-            out.push(Token::Newline);
-            in_fence = !in_fence;
-            continue;
-        }
-        if in_fence {
-            out.push(Token::Fence(trimmed));
-            out.push(Token::Newline);
-            continue;
-        }
-        let mut rest = trimmed;
-        while let Some(pos) = rest.find('`') {
-            if pos > 0 {
-                out.push(Token::Text(&rest[..pos]));
-            }
-            if let Some(end) = rest[pos + 1..].find('`') {
-                out.push(Token::Code(&rest[pos + 1..pos + 1 + end]));
-                rest = &rest[pos + end + 2..];
-            } else {
-                out.push(Token::Text(&rest[pos..]));
-                rest = "";
-                break;
-            }
-        }
-        if !rest.is_empty() {
-            out.push(Token::Text(rest));
-        }
-        out.push(Token::Newline);
-    }
-    out.pop();
-    out
-}
-
-#[doc(hidden)]
-pub fn is_fence(line: &str) -> bool { FENCE_RE.is_match(line) }
diff --git a/src/wrap.rs b/src/wrap.rs
index 02508ca7..eb148236 100644
--- a/src/wrap.rs
+++ b/src/wrap.rs
@@ -38,17 +38,6 @@ static BLOCKQUOTE_RE: std::sync::LazyLock<Regex> = lazy_regex!(
     "blockquote pattern regex should compile",
 );
 
-/// Determine if the current line should break at the last whitespace.
-///
-/// Returns `true` if `current_width` exceeds `width` and a whitespace split
-/// position is available.
-fn should_break_line(width: usize, current_width: usize, last_split: Option<usize>) -> bool {
-    current_width > width && last_split.is_some()
-}
-
-static BLOCKQUOTE_RE: std::sync::LazyLock<Regex> =
-    std::sync::LazyLock::new(|| Regex::new(r"^(\s*(?:>\s*)+)(.*)$").unwrap());
-
 /// Matches `markdownlint` comment directives.
 ///
 /// The regex is case-insensitive and recognises these forms with optional rule
@@ -65,42 +54,6 @@ static MARKDOWNLINT_DIRECTIVE_RE: std::sync::LazyLock<Regex> = std::sync::LazyLo
     .expect("valid markdownlint regex")
 });
 
-struct PrefixHandler {
-    re: &'static std::sync::LazyLock<Regex>,
-    is_bq: bool,
-    build_prefix: fn(&Captures) -> String,
-    rest_group: usize,
-}
-
-impl PrefixHandler {
-    fn build_bullet_prefix(cap: &Captures) -> String { cap[1].to_string() }
-
-    fn build_footnote_prefix(cap: &Captures) -> String { format!("{}{}", &cap[1], &cap[2]) }
-
-    fn build_blockquote_prefix(cap: &Captures) -> String { cap[1].to_string() }
-}
-
-static HANDLERS: &[PrefixHandler] = &[
-    PrefixHandler {
-        re: &BULLET_RE,
-        is_bq: false,
-        build_prefix: PrefixHandler::build_bullet_prefix,
-        rest_group: 2,
-    },
-    PrefixHandler {
-        re: &FOOTNOTE_RE,
-        is_bq: false,
-        build_prefix: PrefixHandler::build_footnote_prefix,
-        rest_group: 3,
-    },
-    PrefixHandler {
-        re: &BLOCKQUOTE_RE,
-        is_bq: true,
-        build_prefix: PrefixHandler::build_blockquote_prefix,
-        rest_group: 2,
-    },
-];
-
 fn wrap_preserving_code(text: &str, width: usize) -> Vec<String> {
     use unicode_width::UnicodeWidthStr;
 
@@ -210,7 +163,9 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec<String> {
 }
 
 #[doc(hidden)]
-pub fn is_fence(line: &str) -> bool { FENCE_RE.is_match(line) }
+pub fn is_fence(line: &str) -> bool {
+    FENCE_RE.is_match(line)
+}
 
 pub(crate) fn is_markdownlint_directive(line: &str) -> bool {
     MARKDOWNLINT_DIRECTIVE_RE.is_match(line)

From ce4519a0586314a760cc31501f38f135379421a4 Mon Sep 17 00:00:00 2001
From: Payton McIntosh <pmcintosh@df12.net>
Date: Sat, 2 Aug 2025 00:39:12 +0100
Subject: [PATCH 4/5] Formatting for rust-fmt

---
 src/html.rs         | 4 +---
 src/io.rs           | 4 +---
 src/wrap.rs         | 4 +---
 tests/common/mod.rs | 2 +-
 4 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/html.rs b/src/html.rs
index b148e314..2742d7b3 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -84,9 +84,7 @@ fn is_element(handle: &Handle, tag: &str) -> bool {
 }
 
 /// Returns `true` if `handle` represents a `<td>` or `<th>` element.
-fn is_table_cell(handle: &Handle) -> bool {
-    is_element(handle, "td") || is_element(handle, "th")
-}
+fn is_table_cell(handle: &Handle) -> bool { is_element(handle, "td") || is_element(handle, "th") }
 
 /// Walks the DOM tree collecting `<table>` nodes under `handle`.
 fn collect_tables(handle: &Handle, tables: &mut Vec<Handle>) {
diff --git a/src/io.rs b/src/io.rs
index e9bd9c17..cb30bea4 100644
--- a/src/io.rs
+++ b/src/io.rs
@@ -30,9 +30,7 @@ where
 ///
 /// # Errors
 /// Returns an error if reading or writing the file fails.
-pub fn rewrite(path: &Path) -> std::io::Result<()> {
-    rewrite_with(path, process_stream)
-}
+pub fn rewrite(path: &Path) -> std::io::Result<()> { rewrite_with(path, process_stream) }
 
 /// Rewrite a file in place without wrapping text.
 ///
diff --git a/src/wrap.rs b/src/wrap.rs
index eb148236..430e4be9 100644
--- a/src/wrap.rs
+++ b/src/wrap.rs
@@ -163,9 +163,7 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec<String> {
 }
 
 #[doc(hidden)]
-pub fn is_fence(line: &str) -> bool {
-    FENCE_RE.is_match(line)
-}
+pub fn is_fence(line: &str) -> bool { FENCE_RE.is_match(line) }
 
 pub(crate) fn is_markdownlint_directive(line: &str) -> bool {
     MARKDOWNLINT_DIRECTIVE_RE.is_match(line)
diff --git a/tests/common/mod.rs b/tests/common/mod.rs
index ff4729e8..64b5fc38 100644
--- a/tests/common/mod.rs
+++ b/tests/common/mod.rs
@@ -19,7 +19,7 @@ macro_rules! lines_vec {
 ///
 /// Example:
 /// ```
-/// let input: Vec<String> = include_lines!("data/bold_header_input.txt");
+/// let input: Vec<String> = include_lines!("data/bold_header_input.txt"); 
 /// ```
 #[expect(unused_macros, reason = "macros are optional helpers across modules")]
 macro_rules! include_lines {

From bcad4683af8eca72c440c4b36b3c435dbb65237b Mon Sep 17 00:00:00 2001
From: Payton McIntosh <pmcintosh@df12.net>
Date: Sat, 2 Aug 2025 00:42:34 +0100
Subject: [PATCH 5/5] Reformat per clippy

---
 src/html.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/html.rs b/src/html.rs
index 2742d7b3..c3372881 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -112,10 +112,10 @@ fn is_bold_tag(tag: &str) -> bool {
 
 /// Returns `true` if `handle` contains a `<b>` or `<strong>` descendant.
 fn contains_strong(handle: &Handle) -> bool {
-    if let NodeData::Element { name, .. } = &handle.data {
-        if is_bold_tag(name.local.as_ref()) {
-            return true;
-        }
+    if let NodeData::Element { name, .. } = &handle.data
+        && is_bold_tag(name.local.as_ref())
+    {
+        return true;
     }
     let children = handle.children.borrow();
     children.iter().any(contains_strong)