diff --git a/src/html.rs b/src/html.rs index 2742d7b3..c3372881 100644 --- a/src/html.rs +++ b/src/html.rs @@ -112,10 +112,10 @@ fn is_bold_tag(tag: &str) -> bool { /// Returns `true` if `handle` contains a `` or `` descendant. fn contains_strong(handle: &Handle) -> bool { - if let NodeData::Element { name, .. } = &handle.data { - if is_bold_tag(name.local.as_ref()) { - return true; - } + if let NodeData::Element { name, .. } = &handle.data + && is_bold_tag(name.local.as_ref()) + { + return true; } let children = handle.children.borrow(); children.iter().any(contains_strong) diff --git a/src/wrap.rs b/src/wrap.rs index 91b9e0d3..430e4be9 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -8,7 +8,7 @@ //! The [`Token`] enum and [`tokenize_markdown`] function are public so callers //! can perform custom token-based processing. -use regex::{Captures, Regex}; +use regex::Regex; mod tokenize; /// Token emitted by [`tokenize::segment_inline`] and used by higher-level wrappers. @@ -23,14 +23,20 @@ pub use tokenize::tokenize_markdown; static FENCE_RE: std::sync::LazyLock = std::sync::LazyLock::new(|| Regex::new(r"^\s*(```|~~~).*").unwrap()); -static BULLET_RE: std::sync::LazyLock = - std::sync::LazyLock::new(|| Regex::new(r"^(\s*(?:[-*+]|\d+[.)])\s+)(.*)").unwrap()); +static BULLET_RE: std::sync::LazyLock = lazy_regex!( + r"^(\s*(?:[-*+]|\d+[.)])\s+)(.*)", + "bullet pattern regex should compile", +); -static FOOTNOTE_RE: std::sync::LazyLock = - std::sync::LazyLock::new(|| Regex::new(r"^(\s*)(\[\^[^]]+\]:\s*)(.*)$").unwrap()); +static FOOTNOTE_RE: std::sync::LazyLock = lazy_regex!( + r"^(\s*)(\[\^[^]]+\]:\s*)(.*)$", + "footnote pattern regex should compile", +); -static BLOCKQUOTE_RE: std::sync::LazyLock = - std::sync::LazyLock::new(|| Regex::new(r"^(\s*(?:>\s*)+)(.*)$").unwrap()); +static BLOCKQUOTE_RE: std::sync::LazyLock = lazy_regex!( + r"^(\s*(?:>\s*)+)(.*)$", + "blockquote pattern regex should compile", +); /// Matches `markdownlint` comment directives. /// @@ -48,42 +54,6 @@ static MARKDOWNLINT_DIRECTIVE_RE: std::sync::LazyLock = std::sync::LazyLo .expect("valid markdownlint regex") }); -struct PrefixHandler { - re: &'static std::sync::LazyLock, - is_bq: bool, - build_prefix: fn(&Captures) -> String, - rest_group: usize, -} - -impl PrefixHandler { - fn build_bullet_prefix(cap: &Captures) -> String { cap[1].to_string() } - - fn build_footnote_prefix(cap: &Captures) -> String { format!("{}{}", &cap[1], &cap[2]) } - - fn build_blockquote_prefix(cap: &Captures) -> String { cap[1].to_string() } -} - -static HANDLERS: &[PrefixHandler] = &[ - PrefixHandler { - re: &BULLET_RE, - is_bq: false, - build_prefix: PrefixHandler::build_bullet_prefix, - rest_group: 2, - }, - PrefixHandler { - re: &FOOTNOTE_RE, - is_bq: false, - build_prefix: PrefixHandler::build_footnote_prefix, - rest_group: 3, - }, - PrefixHandler { - re: &BLOCKQUOTE_RE, - is_bq: true, - build_prefix: PrefixHandler::build_blockquote_prefix, - rest_group: 2, - }, -]; - fn wrap_preserving_code(text: &str, width: usize) -> Vec { use unicode_width::UnicodeWidthStr; @@ -283,8 +253,8 @@ pub fn wrap_text(lines: &[String], width: usize) -> Vec { let mut indent = String::new(); let mut in_code = false; - 'line_loop: for line in lines { - if FENCE_RE.is_match(line) { + for line in lines { + if is_fence(line) { flush_paragraph(&mut out, &buf, &indent, width); buf.clear(); indent.clear(); @@ -330,21 +300,31 @@ pub fn wrap_text(lines: &[String], width: usize) -> Vec { continue; } - for handler in HANDLERS { - if let Some(cap) = handler.re.captures(line) { - let prefix = (handler.build_prefix)(&cap); - let rest = cap.get(handler.rest_group).unwrap().as_str(); - handle_prefix_line( - &mut out, - &mut buf, - &mut indent, - width, - &prefix, - rest, - handler.is_bq, - ); - continue 'line_loop; - } + if let Some(cap) = BULLET_RE.captures(line) { + let prefix = cap.get(1).expect("bullet regex capture").as_str(); + let rest = cap.get(2).expect("bullet regex remainder capture").as_str(); + handle_prefix_line(&mut out, &mut buf, &mut indent, width, prefix, rest, false); + continue; + } + + if let Some(cap) = FOOTNOTE_RE.captures(line) { + let prefix = format!("{}{}", &cap[1], &cap[2]); + let rest = cap + .get(3) + .expect("footnote regex remainder capture") + .as_str(); + handle_prefix_line(&mut out, &mut buf, &mut indent, width, &prefix, rest, false); + continue; + } + + if let Some(cap) = BLOCKQUOTE_RE.captures(line) { + let prefix = cap.get(1).expect("blockquote prefix capture").as_str(); + let rest = cap + .get(2) + .expect("blockquote regex remainder capture") + .as_str(); + handle_prefix_line(&mut out, &mut buf, &mut indent, width, prefix, rest, true); + continue; } if buf.is_empty() { diff --git a/tests/common/mod.rs b/tests/common/mod.rs index ff4729e8..64b5fc38 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -19,7 +19,7 @@ macro_rules! lines_vec { /// /// Example: /// ``` -/// let input: Vec = include_lines!("data/bold_header_input.txt"); +/// let input: Vec = include_lines!("data/bold_header_input.txt"); /// ``` #[expect(unused_macros, reason = "macros are optional helpers across modules")] macro_rules! include_lines { diff --git a/tests/wrap_unit.rs b/tests/wrap_unit.rs new file mode 100644 index 00000000..7f127cf9 --- /dev/null +++ b/tests/wrap_unit.rs @@ -0,0 +1,106 @@ +use mdtablefix::wrap::wrap_text; + +#[test] +fn wrap_text_preserves_hyphenated_words() { + let input = vec!["A word that is very-long-word indeed".to_string()]; + let wrapped = wrap_text(&input, 20); + assert_eq!( + wrapped, + vec![ + "A word that is".to_string(), + "very-long-word".to_string(), + "indeed".to_string(), + ] + ); +} + +#[test] +fn wrap_text_does_not_insert_spaces_in_hyphenated_words() { + let input = vec![ + concat!( + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt ", + "elit-sed fermentum congue. Vivamus dictum nulla sed consectetur ", + "volutpat." + ) + .to_string(), + ]; + let wrapped = wrap_text(&input, 80); + assert_eq!( + wrapped, + vec![ + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt".to_string(), + "elit-sed fermentum congue. Vivamus dictum nulla sed consectetur volutpat.".to_string(), + ] + ); +} + +#[test] +fn wrap_text_preserves_code_spans() { + let input = vec![ + "with their own escaping rules. On Windows, scripts default to `powershell -Command` \ + unless the manifest's `interpreter` field overrides the setting." + .to_string(), + ]; + let wrapped = wrap_text(&input, 60); + assert_eq!( + wrapped, + vec![ + "with their own escaping rules. On Windows, scripts default".to_string(), + "to `powershell -Command` unless the manifest's".to_string(), + "`interpreter` field overrides the setting.".to_string(), + ] + ); +} + +#[test] +fn wrap_text_multiple_code_spans() { + let input = vec!["combine `foo bar` and `baz qux` in one line".to_string()]; + let wrapped = wrap_text(&input, 25); + assert_eq!( + wrapped, + vec![ + "combine `foo bar` and".to_string(), + "`baz qux` in one line".to_string(), + ] + ); +} + +#[test] +fn wrap_text_nested_backticks() { + let input = vec!["Use `` `code` `` to quote backticks".to_string()]; + let wrapped = wrap_text(&input, 20); + assert_eq!( + wrapped, + vec![ + "Use `` `code` `` to".to_string(), + "quote backticks".to_string() + ] + ); +} + +#[test] +fn wrap_text_unmatched_backticks() { + let input = vec!["This has a `dangling code span.".to_string()]; + let wrapped = wrap_text(&input, 20); + assert_eq!( + wrapped, + vec!["This has a".to_string(), "`dangling code span.".to_string()] + ); +} + +#[test] +fn wrap_text_preserves_links() { + let input = vec![ + "`falcon-pachinko` is an extension library for the".to_string(), + "[Falcon](https://falcon.readthedocs.io) web framework. It adds a structured".to_string(), + "approach to asynchronous WebSocket routing and background worker integration.".to_string(), + ]; + let wrapped = wrap_text(&input, 80); + let joined = wrapped.join("\n"); + assert_eq!(joined.matches("https://").count(), 1); + assert!( + wrapped + .iter() + .any(|l| l.contains("https://falcon.readthedocs.io")) + ); +}