From 6d47b9d3cf21607e9bb54b10880b7b55ed55006e Mon Sep 17 00:00:00 2001 From: Leynos Date: Tue, 29 Jul 2025 00:20:38 +0100 Subject: [PATCH 1/6] Prevent link punctuation wrapping --- src/wrap.rs | 4 ++++ tests/wrap.rs | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/src/wrap.rs b/src/wrap.rs index 9cf3e670..6ce3bc65 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -106,6 +106,10 @@ fn parse_link_or_image(chars: &[char], mut i: usize) -> (String, usize) { } i += 1; } + // treat trailing punctuation as part of the link token + if i < chars.len() && matches!(chars[i], '.' | ',' | '!' | '?' | ':' | ';') { + i += 1; + } let tok: String = chars[start..i].iter().collect(); return (tok, i); } diff --git a/tests/wrap.rs b/tests/wrap.rs index 2ab02b19..6b25f22d 100644 --- a/tests/wrap.rs +++ b/tests/wrap.rs @@ -475,6 +475,18 @@ fn test_wrap_paragraph_with_nested_link() { ); } +/// Ensures punctuation immediately following a link remains attached when +/// wrapping lines. +#[test] +fn test_wrap_link_with_trailing_punctuation() { + let input = lines_vec![ + "[`rust-multithreaded-logging-framework-for-python-design.md`](./\ + rust-multithreaded-logging-framework-for-python-design.md).", + ]; + let output = process_stream(&input); + assert_eq!(output, input); +} + /// Regression test for wrapping list items that end with a full stop. /// /// The period following the inline code span should remain on the same line From 5f36a4488b40fea9fe9f337cbca071f3f506d881 Mon Sep 17 00:00:00 2001 From: Leynos Date: Wed, 30 Jul 2025 07:40:37 +0100 Subject: [PATCH 2/6] Handle trailing punctuation after links --- docs/architecture.md | 13 +++++++++++++ src/wrap.rs | 40 +++++++++++++++++++++++++++++++++------- tests/wrap.rs | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 7 deletions(-) diff --git a/docs/architecture.md b/docs/architecture.md index dbf2524e..358bf4bc 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -307,3 +307,16 @@ multibyte characters from causing unexpected wraps or truncation. Whenever wrapping logic examines the length of a token, it relies on `UnicodeWidthStr::width` to measure visible columns rather than byte length. + +## Link punctuation handling + +Trailing punctuation immediately following a Markdown link or image is +tokenised separately and grouped with the link when wrapping. This keeps +sentences like: + +```markdown +[link](path). +``` + +on a single line rather than splitting the punctuation onto the next line when +wrapping occurs. diff --git a/src/wrap.rs b/src/wrap.rs index 6ce3bc65..e8fe4b4d 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -106,10 +106,6 @@ fn parse_link_or_image(chars: &[char], mut i: usize) -> (String, usize) { } i += 1; } - // treat trailing punctuation as part of the link token - if i < chars.len() && matches!(chars[i], '.' | ',' | '!' | '?' | ':' | ';') { - i += 1; - } let tok: String = chars[start..i].iter().collect(); return (tok, i); } @@ -118,6 +114,13 @@ fn parse_link_or_image(chars: &[char], mut i: usize) -> (String, usize) { (tok, start + 1) } +fn is_trailing_punctuation(c: char) -> bool { + matches!( + c, + '.' | ',' | ';' | ':' | '!' | '?' | ')' | ']' | '"' | '\'' + ) +} + fn tokenize_inline(text: &str) -> Vec { let mut tokens = Vec::new(); let chars: Vec = text.chars().collect(); @@ -161,8 +164,16 @@ fn tokenize_inline(text: &str) -> Vec { i = end; } } else if c == '[' || (c == '!' && i + 1 < chars.len() && chars[i + 1] == '[') { - let (tok, new_i) = parse_link_or_image(&chars, i); + let (tok, mut new_i) = parse_link_or_image(&chars, i); tokens.push(tok); + let mut punct = String::new(); + while new_i < chars.len() && is_trailing_punctuation(chars[new_i]) { + punct.push(chars[new_i]); + new_i += 1; + } + if !punct.is_empty() { + tokens.push(punct); + } i = new_i; } else { let start = i; @@ -255,8 +266,23 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec { let mut current = String::new(); let mut current_width = 0; let mut last_split: Option = None; - for token in tokenize_inline(text) { - let token_width = UnicodeWidthStr::width(token.as_str()); + let tokens = tokenize_inline(text); + let mut i = 0; + while i < tokens.len() { + let mut token = tokens[i].clone(); + let mut token_width = UnicodeWidthStr::width(token.as_str()); + + if token.contains("](") && token.ends_with(')') { + let mut j = i + 1; + while j < tokens.len() && tokens[j].chars().all(is_trailing_punctuation) { + token.push_str(&tokens[j]); + token_width += UnicodeWidthStr::width(tokens[j].as_str()); + j += 1; + } + i = j; + } else { + i += 1; + } if current.is_empty() && token.len() == 1 && ".?!,:;".contains(token.as_str()) diff --git a/tests/wrap.rs b/tests/wrap.rs index 6b25f22d..c07a9768 100644 --- a/tests/wrap.rs +++ b/tests/wrap.rs @@ -487,6 +487,45 @@ fn test_wrap_link_with_trailing_punctuation() { assert_eq!(output, input); } +/// Test links followed by various punctuation marks remain on a single line. +#[rstest] +#[case(".")] +#[case(",")] +#[case(";")] +#[case(":")] +#[case("!")] +#[case("?")] +#[case("...")] +fn test_wrap_link_with_various_trailing_punctuation(#[case] punct: &str) { + let input = lines_vec![format!("[link](https://example.com){}", punct)]; + let output = process_stream(&input); + assert_eq!(output, input, "Failed for punctuation: {punct}"); +} + +/// Test a link at line end without trailing punctuation. +#[test] +fn test_wrap_link_at_line_end() { + let input = lines_vec!["Check out [link](https://example.com)"]; + let output = process_stream(&input); + assert_eq!(output, input); +} + +/// Test links containing punctuation within the link text. +#[test] +fn test_wrap_link_with_punctuation_in_text() { + let input = lines_vec!["[foo, bar!](https://example.com)"]; + let output = process_stream(&input); + assert_eq!(output, input); +} + +/// Test links containing punctuation inside the URL. +#[test] +fn test_wrap_link_with_punctuation_in_url() { + let input = lines_vec!["[link](https://example.com/foo,bar)"]; + let output = process_stream(&input); + assert_eq!(output, input); +} + /// Regression test for wrapping list items that end with a full stop. /// /// The period following the inline code span should remain on the same line From 3d9d19b1eb474c50fab5ecf1f12366d29824b962 Mon Sep 17 00:00:00 2001 From: Leynos Date: Wed, 30 Jul 2025 21:09:21 +0100 Subject: [PATCH 3/6] Refine link wrapping --- docs/architecture.md | 2 +- src/wrap.rs | 49 ++++++++++++++++++++++++++------------------ 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/docs/architecture.md b/docs/architecture.md index 358bf4bc..bc08d99a 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -318,5 +318,5 @@ sentences like: [link](path). ``` -on a single line rather than splitting the punctuation onto the next line when +on a single line, rather than splitting the punctuation onto the next line when wrapping occurs. diff --git a/src/wrap.rs b/src/wrap.rs index e8fe4b4d..8b904a8a 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -269,19 +269,14 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec { let tokens = tokenize_inline(text); let mut i = 0; while i < tokens.len() { - let mut token = tokens[i].clone(); - let mut token_width = UnicodeWidthStr::width(token.as_str()); + let mut j = i + 1; + let mut group_width = UnicodeWidthStr::width(tokens[i].as_str()); - if token.contains("](") && token.ends_with(')') { - let mut j = i + 1; + if tokens[i].contains("](") && tokens[i].ends_with(')') { while j < tokens.len() && tokens[j].chars().all(is_trailing_punctuation) { - token.push_str(&tokens[j]); - token_width += UnicodeWidthStr::width(tokens[j].as_str()); + group_width += UnicodeWidthStr::width(tokens[j].as_str()); j += 1; } - i = j; - } else { - i += 1; } if current.is_empty() && token.len() == 1 @@ -296,16 +291,20 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec { .push_str(&token); continue; } - if current_width + token_width <= width { - current.push_str(&token); - current_width += token_width; - if token.chars().all(char::is_whitespace) { - last_split = Some(current.len()); + + if current_width + group_width <= width { + for tok in &tokens[i..j] { + current.push_str(tok); + if tok.chars().all(char::is_whitespace) { + last_split = Some(current.len()); + } + current_width += UnicodeWidthStr::width(tok.as_str()); } + i = j; continue; } - if should_break_line(width, current_width + token_width, last_split) { + if should_break_line(width, current_width + group_width, last_split) { let pos = last_split.unwrap(); let line = current[..pos].to_string(); let mut rest = current[pos..].trim_start().to_string(); @@ -313,10 +312,12 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec { if !trimmed.is_empty() { lines.push(trimmed.to_string()); } - rest.push_str(&token); + for tok in &tokens[i..j] { + rest.push_str(tok); + } current = rest; current_width = UnicodeWidthStr::width(current.as_str()); - last_split = if token.chars().all(char::is_whitespace) { + last_split = if tokens[j - 1].chars().all(char::is_whitespace) { Some(current.len()) } else { None @@ -327,6 +328,7 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec { current_width = 0; last_split = None; } + i = j; continue; } @@ -336,11 +338,18 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec { } current.clear(); current_width = 0; + last_split = None; - if !token.chars().all(char::is_whitespace) { - current.push_str(&token); - current_width = token_width; + for tok in &tokens[i..j] { + if !tok.chars().all(char::is_whitespace) { + current.push_str(tok); + current_width += UnicodeWidthStr::width(tok.as_str()); + } + } + if j > i && tokens[j - 1].chars().all(char::is_whitespace) { + last_split = Some(current.len()); } + i = j; } let trimmed = current.trim_end(); if !trimmed.is_empty() { From bdf6ba1bc94dd1fccc148d6250c6c58d2c7f6e34 Mon Sep 17 00:00:00 2001 From: Payton McIntosh Date: Wed, 30 Jul 2025 21:38:59 +0100 Subject: [PATCH 4/6] Fixes token reference error in code wrapping logic Replaces incorrect local variable usage with correct token reference when appending punctuation to wrapped lines. Prevents potential logic errors when preserving code blocks containing punctuation. --- src/wrap.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/wrap.rs b/src/wrap.rs index 8b904a8a..25501de0 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -278,9 +278,10 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec { j += 1; } } + if current.is_empty() - && token.len() == 1 - && ".?!,:;".contains(token.as_str()) + && tokens[i].len() == 1 + && ".?!,:;".contains(tokens[i].as_str()) && lines .last() .is_some_and(|l: &String| l.trim_end().ends_with('`')) @@ -288,7 +289,7 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec { lines .last_mut() .expect("checked last line exists") - .push_str(&token); + .push_str(&tokens[i]); continue; } From 8e27057ea313d98fb3b9729c3ad997e62c54436a Mon Sep 17 00:00:00 2001 From: Leynos Date: Wed, 30 Jul 2025 21:57:48 +0100 Subject: [PATCH 5/6] Fix duplicate punctuation after inline code (#151) --- AGENTS.md | 7 +++---- src/wrap.rs | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 06f65d12..0a41a4bb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -29,10 +29,9 @@ documentation should omit examples where the example serves only to reiterate the test logic. - **Keep file size managable.** No single code file may be longer than 400 - lines. - Long switch statements or dispatch tables should be broken up by feature and - constituents colocated with targets. Large blocks of test data should be - moved to external data files. + lines. Long switch statements or dispatch tables should be broken up by + feature and constituents colocated with targets. Large blocks of test data + should be moved to external data files. ## Documentation Maintenance diff --git a/src/wrap.rs b/src/wrap.rs index 25501de0..7d331bd3 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -290,6 +290,7 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec { .last_mut() .expect("checked last line exists") .push_str(&tokens[i]); + i += 1; continue; } From 998875906fbb3f841e3656ce6a3f596be6e56054 Mon Sep 17 00:00:00 2001 From: Leynos Date: Wed, 30 Jul 2025 21:58:59 +0100 Subject: [PATCH 6/6] Use Oxford spelling "tokenized" as per en-GB-oxendict guidelines. Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- docs/architecture.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/architecture.md b/docs/architecture.md index bc08d99a..22fc4d56 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -311,7 +311,7 @@ Whenever wrapping logic examines the length of a token, it relies on ## Link punctuation handling Trailing punctuation immediately following a Markdown link or image is -tokenised separately and grouped with the link when wrapping. This keeps +tokenized separately and grouped with the link when wrapping. This keeps sentences like: ```markdown