Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,9 @@
documentation should omit examples where the example serves only to reiterate
the test logic.
- **Keep file size managable.** No single code file may be longer than 400
lines.
Long switch statements or dispatch tables should be broken up by feature and
constituents colocated with targets. Large blocks of test data should be
moved to external data files.
lines. Long switch statements or dispatch tables should be broken up by
feature and constituents colocated with targets. Large blocks of test data
should be moved to external data files.

## Documentation Maintenance

Expand Down
13 changes: 13 additions & 0 deletions docs/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -307,3 +307,16 @@ multibyte characters from causing unexpected wraps or truncation.

Whenever wrapping logic examines the length of a token, it relies on
`UnicodeWidthStr::width` to measure visible columns rather than byte length.

## Link punctuation handling

Trailing punctuation immediately following a Markdown link or image is
tokenized separately and grouped with the link when wrapping. This keeps
sentences like:

```markdown
[link](path).
```

on a single line, rather than splitting the punctuation onto the next line when
wrapping occurs.
75 changes: 58 additions & 17 deletions src/wrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ fn parse_link_or_image(chars: &[char], mut i: usize) -> (String, usize) {
(tok, start + 1)
}

fn is_trailing_punctuation(c: char) -> bool {
matches!(
c,
'.' | ',' | ';' | ':' | '!' | '?' | ')' | ']' | '"' | '\''
)
}

fn tokenize_inline(text: &str) -> Vec<String> {
let mut tokens = Vec::new();
let chars: Vec<char> = text.chars().collect();
Expand Down Expand Up @@ -157,8 +164,16 @@ fn tokenize_inline(text: &str) -> Vec<String> {
i = end;
}
} else if c == '[' || (c == '!' && i + 1 < chars.len() && chars[i + 1] == '[') {
let (tok, new_i) = parse_link_or_image(&chars, i);
let (tok, mut new_i) = parse_link_or_image(&chars, i);
tokens.push(tok);
let mut punct = String::new();
while new_i < chars.len() && is_trailing_punctuation(chars[new_i]) {
punct.push(chars[new_i]);
new_i += 1;
}
if !punct.is_empty() {
tokens.push(punct);
}
i = new_i;
} else {
let start = i;
Expand Down Expand Up @@ -251,42 +266,60 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec<String> {
let mut current = String::new();
let mut current_width = 0;
let mut last_split: Option<usize> = None;
for token in tokenize_inline(text) {
let token_width = UnicodeWidthStr::width(token.as_str());
let tokens = tokenize_inline(text);
let mut i = 0;
while i < tokens.len() {
let mut j = i + 1;
let mut group_width = UnicodeWidthStr::width(tokens[i].as_str());

if tokens[i].contains("](") && tokens[i].ends_with(')') {
while j < tokens.len() && tokens[j].chars().all(is_trailing_punctuation) {
group_width += UnicodeWidthStr::width(tokens[j].as_str());
j += 1;
}
}

if current.is_empty()
&& token.len() == 1
&& ".?!,:;".contains(token.as_str())
&& tokens[i].len() == 1
&& ".?!,:;".contains(tokens[i].as_str())
&& lines
.last()
.is_some_and(|l: &String| l.trim_end().ends_with('`'))
{
lines
.last_mut()
.expect("checked last line exists")
.push_str(&token);
.push_str(&tokens[i]);
i += 1;
continue;
}
if current_width + token_width <= width {
current.push_str(&token);
current_width += token_width;
if token.chars().all(char::is_whitespace) {
last_split = Some(current.len());

if current_width + group_width <= width {
for tok in &tokens[i..j] {
current.push_str(tok);
if tok.chars().all(char::is_whitespace) {
last_split = Some(current.len());
}
current_width += UnicodeWidthStr::width(tok.as_str());
}
i = j;
continue;
}

if should_break_line(width, current_width + token_width, last_split) {
if should_break_line(width, current_width + group_width, last_split) {
let pos = last_split.unwrap();
let line = current[..pos].to_string();
let mut rest = current[pos..].trim_start().to_string();
let trimmed = line.trim_end();
if !trimmed.is_empty() {
lines.push(trimmed.to_string());
}
rest.push_str(&token);
for tok in &tokens[i..j] {
rest.push_str(tok);
}
current = rest;
current_width = UnicodeWidthStr::width(current.as_str());
last_split = if token.chars().all(char::is_whitespace) {
last_split = if tokens[j - 1].chars().all(char::is_whitespace) {
Some(current.len())
} else {
None
Expand All @@ -297,6 +330,7 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec<String> {
current_width = 0;
last_split = None;
}
i = j;
continue;
}

Expand All @@ -306,11 +340,18 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec<String> {
}
current.clear();
current_width = 0;
last_split = None;

if !token.chars().all(char::is_whitespace) {
current.push_str(&token);
current_width = token_width;
for tok in &tokens[i..j] {
if !tok.chars().all(char::is_whitespace) {
current.push_str(tok);
current_width += UnicodeWidthStr::width(tok.as_str());
}
}
if j > i && tokens[j - 1].chars().all(char::is_whitespace) {
last_split = Some(current.len());
}
i = j;
}
let trimmed = current.trim_end();
if !trimmed.is_empty() {
Expand Down
51 changes: 51 additions & 0 deletions tests/wrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,57 @@ fn test_wrap_paragraph_with_nested_link() {
);
}

/// Ensures punctuation immediately following a link remains attached when
/// wrapping lines.
#[test]
fn test_wrap_link_with_trailing_punctuation() {
let input = lines_vec![
"[`rust-multithreaded-logging-framework-for-python-design.md`](./\
rust-multithreaded-logging-framework-for-python-design.md).",
];
let output = process_stream(&input);
assert_eq!(output, input);
}
Comment thread
leynos marked this conversation as resolved.

/// Test links followed by various punctuation marks remain on a single line.
#[rstest]
#[case(".")]
#[case(",")]
#[case(";")]
#[case(":")]
#[case("!")]
#[case("?")]
#[case("...")]
fn test_wrap_link_with_various_trailing_punctuation(#[case] punct: &str) {
let input = lines_vec![format!("[link](https://example.com){}", punct)];
let output = process_stream(&input);
assert_eq!(output, input, "Failed for punctuation: {punct}");
}

/// Test a link at line end without trailing punctuation.
#[test]
fn test_wrap_link_at_line_end() {
let input = lines_vec!["Check out [link](https://example.com)"];
let output = process_stream(&input);
assert_eq!(output, input);
}

/// Test links containing punctuation within the link text.
#[test]
fn test_wrap_link_with_punctuation_in_text() {
let input = lines_vec!["[foo, bar!](https://example.com)"];
let output = process_stream(&input);
assert_eq!(output, input);
}

/// Test links containing punctuation inside the URL.
#[test]
fn test_wrap_link_with_punctuation_in_url() {
let input = lines_vec!["[link](https://example.com/foo,bar)"];
let output = process_stream(&input);
assert_eq!(output, input);
}

/// Regression test for wrapping list items that end with a full stop.
///
/// The period following the inline code span should remain on the same line
Expand Down
Loading