diff --git a/src/html.rs b/src/html.rs
index f495c185..d3ab29b7 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -29,7 +29,10 @@ fn node_text(handle: &Handle) -> String {
let mut out = String::new();
let mut last_space = false;
collect_text(handle, &mut out, &mut last_space);
- out.trim().to_string()
+ if last_space {
+ out.push(' ');
+ }
+ out.trim_start().to_string()
}
fn is_ignored_tag(tag: &str) -> bool {
@@ -178,11 +181,7 @@ fn table_lines_to_markdown(lines: &[String]) -> Vec {
.first()
.map(|l| l.chars().take_while(|c| c.is_whitespace()).collect())
.unwrap_or_default();
- let html: String = lines
- .iter()
- .map(|l| l.trim_end())
- .collect::>()
- .join("\n");
+ let html: String = lines.join("\n");
let opts = ParseOpts::default();
let dom: RcDom = parse_document(RcDom::default(), opts).one(html);
diff --git a/src/process.rs b/src/process.rs
index 81f6e6b7..24fd3135 100644
--- a/src/process.rs
+++ b/src/process.rs
@@ -88,7 +88,7 @@ fn handle_table_line(
) -> bool {
if line.trim_start().starts_with('|') {
*in_table = true;
- buf.push(line.trim_end().to_string());
+ buf.push(line.to_string());
return true;
}
if line.trim().is_empty() {
@@ -98,7 +98,7 @@ fn handle_table_line(
return false;
}
if *in_table && (line.contains('|') || crate::table::SEP_RE.is_match(line.trim())) {
- buf.push(line.trim_end().to_string());
+ buf.push(line.to_string());
return true;
}
if *in_table {
@@ -112,7 +112,7 @@ fn handle_table_line(
flush_buffer(buf, in_table, out);
return false;
}
- buf.push(line.trim_end().to_string());
+ buf.push(line.to_string());
return true;
}
false
diff --git a/src/reflow.rs b/src/reflow.rs
index b3b244ee..a67a9158 100644
--- a/src/reflow.rs
+++ b/src/reflow.rs
@@ -26,7 +26,7 @@ fn collect_cells(chunks: &[&str]) -> Vec {
for (idx, chunk) in chunks.iter().enumerate() {
let mut ch = (*chunk).to_string();
if idx != chunks.len() - 1 {
- ch = ch.trim_end().to_string() + " |ROW_END|";
+ ch.push_str(" |ROW_END|");
}
cells.extend(split_cells(&ch));
}
diff --git a/src/wrap.rs b/src/wrap.rs
index 48b21563..03b4d8a5 100644
--- a/src/wrap.rs
+++ b/src/wrap.rs
@@ -71,7 +71,6 @@ fn is_trailing_punct(c: char) -> bool {
fn extend_punctuation(tokens: &[String], mut j: usize, width: &mut usize) -> usize {
use unicode_width::UnicodeWidthStr;
-
while j < tokens.len() && tokens[j].chars().all(is_trailing_punct) {
*width += UnicodeWidthStr::width(tokens[j].as_str());
j += 1;
@@ -79,9 +78,44 @@ fn extend_punctuation(tokens: &[String], mut j: usize, width: &mut usize) -> usi
j
}
-fn wrap_preserving_code(text: &str, width: usize) -> Vec {
+#[inline]
+fn merge_code_span(tokens: &[String], i: usize, width: &mut usize) -> usize {
use unicode_width::UnicodeWidthStr;
+ debug_assert!(
+ tokens[i] == "`",
+ "merge_code_span requires a single backtick opener"
+ );
+ let mut j = i + 1;
+ while j < tokens.len() && tokens[j] != "`" {
+ *width += UnicodeWidthStr::width(tokens[j].as_str());
+ j += 1;
+ }
+ if j < tokens.len() {
+ *width += UnicodeWidthStr::width(tokens[j].as_str());
+ j += 1;
+ j = extend_punctuation(tokens, j, width);
+ }
+ j
+}
+
+#[inline]
+fn flush_current(lines: &mut Vec, current: &mut String) {
+ let cap = current.capacity();
+ lines.push(std::mem::take(current));
+ *current = String::with_capacity(cap);
+}
+fn flush_trailing_whitespace(lines: &mut Vec, current: &mut String, token: &str) {
+ debug_assert!(
+ token.chars().all(char::is_whitespace),
+ "expected whitespace token"
+ );
+ current.push_str(token);
+ flush_current(lines, current);
+}
+
+fn wrap_preserving_code(text: &str, width: usize) -> Vec {
+ use unicode_width::UnicodeWidthStr;
let mut lines = Vec::new();
let mut current = String::new();
let mut current_width = 0;
@@ -91,11 +125,13 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec {
while i < tokens.len() {
let mut j = i + 1;
let mut group_width = UnicodeWidthStr::width(tokens[i].as_str());
+ if tokens[i] == "`" {
+ j = merge_code_span(&tokens, i, &mut group_width);
+ }
if tokens[i].contains("](") && tokens[i].ends_with(')') {
j = extend_punctuation(&tokens, j, &mut group_width);
}
-
if tokens[i].starts_with('`') && tokens[i].ends_with('`') {
// Keep trailing punctuation glued to inline code spans.
j = extend_punctuation(&tokens, j, &mut group_width);
@@ -115,7 +151,6 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec {
i += 1;
continue;
}
-
if current_width + group_width <= width {
for tok in &tokens[i..j] {
current.push_str(tok);
@@ -132,6 +167,7 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec {
let pos = last_split.unwrap();
let line = current[..pos].to_string();
let mut rest = current[pos..].trim_start().to_string();
+ // Mid-wrap lines discard trailing spaces.
let trimmed = line.trim_end();
if !trimmed.is_empty() {
lines.push(trimmed.to_string());
@@ -147,6 +183,7 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec {
None
};
if current_width > width {
+ // Mid-wrap overflow flush trims trailing spaces.
lines.push(current.trim_end().to_string());
current.clear();
current_width = 0;
@@ -155,12 +192,20 @@ fn wrap_preserving_code(text: &str, width: usize) -> Vec {
i = j;
continue;
}
-
- let trimmed = current.trim_end();
- if !trimmed.is_empty() {
- lines.push(trimmed.to_string());
+ if tokens[i].chars().all(char::is_whitespace) && j == tokens.len() {
+ // Preserve trailing spaces that forced a flush.
+ if !current.is_empty() {
+ flush_trailing_whitespace(&mut lines, &mut current, &tokens[i]);
+ }
+ current_width = 0;
+ last_split = None;
+ i = j;
+ continue;
+ }
+ if !current.is_empty() {
+ // Reuse allocation to avoid repeated growth on long wraps.
+ flush_current(&mut lines, &mut current);
}
- current.clear();
current_width = 0;
last_split = None;
@@ -218,7 +263,6 @@ fn append_wrapped_with_prefix(
repeat_prefix: bool,
) {
use unicode_width::UnicodeWidthStr;
-
let prefix_width = UnicodeWidthStr::width(prefix);
let available = width.saturating_sub(prefix_width).max(1);
let indent_str: String = prefix.chars().take_while(|c| c.is_whitespace()).collect();
diff --git a/src/wrap/tests.rs b/src/wrap/tests.rs
index 8c10c636..9c9ed54e 100644
--- a/src/wrap/tests.rs
+++ b/src/wrap/tests.rs
@@ -6,6 +6,7 @@
use rstest::rstest;
use super::super::*;
+use super::wrap_preserving_code;
#[test]
fn wrap_text_preserves_hyphenated_words() {
@@ -113,23 +114,31 @@ fn wrap_text_preserves_links() {
}
#[rstest]
-#[case("ends with space ", 80, &["ends with space "])]
-#[case("four spaces ", 80, &["four spaces "])]
-#[case(" ", 80, &[" "])]
-#[case("word1 word2 ", 8, &["word1", "word2 "])]
-fn wrap_preserving_code_keeps_trailing_spaces(
+#[case("trail ", 80, &["trail "])]
+#[case("`code span` ", 12, &["`code span` "])]
+#[case("foo ", 3, &["foo "])]
+#[case("x ", 1, &["x "])]
+fn preserves_trailing_spaces(#[case] input: &str, #[case] width: usize, #[case] expected: &[&str]) {
+ let out = wrap_preserving_code(input, width);
+ assert_eq!(
+ out,
+ expected.iter().map(|&s| s.to_string()).collect::>()
+ );
+}
+
+#[rstest]
+#[case("aaaaaaaaaaaa", 5, &["aaaaaaaaaaaa"])] // forced flush without split
+#[case("abcde", 3, &["abcde"])]
+#[case("`codespan`", 6, &["`codespan`"])]
+fn no_split_forced_flush_no_trim(
#[case] input: &str,
#[case] width: usize,
#[case] expected: &[&str],
) {
- // The final flush must not trim trailing spaces, even after wrapping.
- let lines = super::wrap_preserving_code(input, width);
+ let out = wrap_preserving_code(input, width);
assert_eq!(
- lines,
- expected
- .iter()
- .map(ToString::to_string)
- .collect::>()
+ out,
+ expected.iter().map(|&s| s.to_string()).collect::>()
);
}
diff --git a/tests/table/convert_html.rs b/tests/table/convert_html.rs
index 1ace15a5..21f57e24 100644
--- a/tests/table/convert_html.rs
+++ b/tests/table/convert_html.rs
@@ -62,3 +62,18 @@ fn test_convert_html_table_bold_header() {
let expected: Vec = include_lines!("data/bold_header_expected.txt");
assert_eq!(convert_html_tables(&input), expected);
}
+#[test]
+fn preserves_trailing_spaces_in_cells() {
+ let input = lines_vec![
+ "",
+ ];
+ let expected = lines_vec![
+ "| H |",
+ "| --- |",
+ "| cell |",
+ ];
+ assert_eq!(convert_html_tables(&input), expected);
+}