diff --git a/Cargo.lock b/Cargo.lock index aaf277fd..510f7e10 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -407,8 +407,7 @@ dependencies = [ "regex", "rstest", "tempfile", - "textwrap", - "unicode-width 0.1.14", + "unicode-width", ] [[package]] @@ -725,12 +724,6 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" -[[package]] -name = "smawk" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" - [[package]] name = "string_cache" version = "0.8.9" @@ -803,41 +796,18 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" -[[package]] -name = "textwrap" -version = "0.16.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" -dependencies = [ - "smawk", - "unicode-linebreak", - "unicode-width 0.2.1", -] - [[package]] name = "unicode-ident" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" -[[package]] -name = "unicode-linebreak" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" - [[package]] name = "unicode-width" version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" -[[package]] -name = "unicode-width" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" - [[package]] name = "utf-8" version = "0.7.6" diff --git a/Cargo.toml b/Cargo.toml index baa976fe..7c529e07 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,6 @@ clap = { version = "4", features = ["derive"] } regex = "1" html5ever = "0.27" markup5ever_rcdom = "0.3" -textwrap = "^0.16" unicode-width = ">=0.1, <0.2" diff --git a/src/lib.rs b/src/lib.rs index 77a3032b..5468c4ea 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -220,9 +220,6 @@ pub fn reflow_table(lines: &[String]) -> Vec { static FENCE_RE: std::sync::LazyLock = std::sync::LazyLock::new(|| Regex::new(r"^(```|~~~).*").unwrap()); -static CODE_SPAN_RE: std::sync::LazyLock = - std::sync::LazyLock::new(|| Regex::new(r"(`+[^`]*`+)").unwrap()); - static BULLET_RE: std::sync::LazyLock = std::sync::LazyLock::new(|| Regex::new(r"^(\s*(?:[-*+]|\d+[.)])\s+)(.*)").unwrap()); @@ -259,16 +256,17 @@ fn tokenize_markdown(text: &str) -> Vec { } if count == delim_len { end = j; - tokens.push(chars[start..end].iter().collect()); - i = end; break; } } end += 1; } if end >= chars.len() { - tokens.push(chars[start..].iter().collect()); - break; + tokens.push(chars[start..start + delim_len].iter().collect()); + i = start + delim_len; + } else { + tokens.push(chars[start..end].iter().collect()); + i = end; } } else { let start = i; @@ -303,14 +301,19 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec { if current_width + token_width <= width { current.push_str(&token); current_width += token_width; - } else { - let trimmed = current.trim_end(); - if !trimmed.is_empty() { - lines.push(trimmed.to_string()); - } - current.clear(); - current_width = token_width; + continue; + } + + let trimmed = current.trim_end(); + if !trimmed.is_empty() { + lines.push(trimmed.to_string()); + } + current.clear(); + current_width = 0; + + if !token.chars().all(char::is_whitespace) { current.push_str(&token); + current_width = token_width; } } let trimmed = current.trim_end(); @@ -338,22 +341,6 @@ pub fn is_fence(line: &str) -> bool { FENCE_RE.is_match(line) } /// Inline code spans are delimited by matching pairs of backticks. This helper /// replaces normal spaces inside those spans with `U+00A0` (non-breaking space) /// so that the wrapping logic does not split them across lines. -fn protect_code_span_spaces(text: &str) -> String { - CODE_SPAN_RE - .replace_all(text, |caps: ®ex::Captures| { - caps[0].replace(' ', "\u{00A0}") - }) - .into_owned() -} - -fn wrap_segment(seg: &str, indent: &str, width: usize, out: &mut Vec) { - let opts = Options::new(width - indent.len()).word_splitter(WordSplitter::NoHyphenation); - let protected = protect_code_span_spaces(seg); - for line in fill(&protected, &opts).lines() { - let restored = line.replace('\u{00A0}', " "); - out.push(format!("{indent}{restored}")); - } -} /// Flushes a buffered paragraph to the output, wrapping text to the specified width and applying /// indentation. ///