From 55b5a15f862341d595ad00ac195cfcf60c3ce5db Mon Sep 17 00:00:00 2001 From: Payton McIntosh Date: Sat, 18 Apr 2026 16:17:57 +0100 Subject: [PATCH 1/6] Address review feedback for wrapping and footnotes Centralize mutable state in the list, HTML-table, footnote, and paragraph-wrapping flows so the behaviour is easier to reason about from one place. Fix the cargo-binstall Linux override, remove production `expect` usage from the reviewed paths, normalize whitespace-only wrapped lines, and add regression tests for mixed code-emphasis affixes, Unicode-width indentation, and indented multi-line HTML tables. --- Cargo.toml | 2 +- src/code_emphasis.rs | 11 ++++ src/footnotes/renumber.rs | 78 ++++++++++------------- src/html.rs | 56 ++++++----------- src/lists.rs | 33 ++++++---- src/wrap.rs | 64 +++++++++---------- src/wrap/fence.rs | 5 +- src/wrap/inline.rs | 5 +- src/wrap/line_buffer.rs | 4 ++ src/wrap/paragraph.rs | 122 +++++++++++++++++------------------- src/wrap/tests.rs | 29 +++++---- tests/table/convert_html.rs | 16 +++++ 12 files changed, 210 insertions(+), 215 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index acb11865..4b6c0ae2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ the `--wrap` option is used, it also wraps paragraphs and list items to 80 colum [package.metadata.binstall.overrides.'cfg(all(target_os = "linux", any(target_arch = "x86_64", target_arch = "aarch64"), target_env = "gnu"))'] pkg-url = "{ repo }/releases/download/v{ version }/{ name }-{ version }-{ target }.tar.gz" -bin-dir = "{ bin }{ binary-ext }" +bin-dir = "." pkg-fmt = "tgz" [dependencies] diff --git a/src/code_emphasis.rs b/src/code_emphasis.rs index b5acd194..84d48b6c 100644 --- a/src/code_emphasis.rs +++ b/src/code_emphasis.rs @@ -231,4 +231,15 @@ mod tests { let input = vec!["before `code` after".to_string()]; assert_eq!(fix_code_emphasis(&input), input); } + + #[test] + fn consume_code_affixes_clears_mixed_pending_prefix() { + let mut tokens = vec![Token::Text("*lead*tail")].into_iter().peekable(); + let mut pending = "**"; + + let (prefix, suffix, modified) = consume_code_affixes(&mut tokens, &mut pending); + + assert_eq!((prefix, suffix, modified), ("", "", true)); + assert_eq!(tokens.next(), Some(Token::Text("lead*tail"))); + } } diff --git a/src/footnotes/renumber.rs b/src/footnotes/renumber.rs index 203c4d59..98471eed 100644 --- a/src/footnotes/renumber.rs +++ b/src/footnotes/renumber.rs @@ -279,16 +279,14 @@ struct DefinitionUpdates { is_definition_line: Vec, } -struct DefinitionScanContext<'a> { +struct DefinitionScanState<'a> { mapping: &'a mut HashMap, next_number: &'a mut usize, numeric_list_range: Option<(usize, usize)>, skip_numeric_conversion: bool, -} - -struct DefinitionAccumulator { definitions: Vec, is_definition_line: Vec, + numeric_candidates: Vec, } fn assign_new_number( @@ -327,7 +325,7 @@ fn definition_line_from_parts( let rewritten_rest = rewrite_tokens(parts.rest, mapping); let mut line = String::with_capacity(parts.prefix.len() + rewritten_rest.len() + 8); line.push_str(parts.prefix); - write!(&mut line, "[^{new_number}]:").expect("write to string cannot fail"); + let _ = write!(&mut line, "[^{new_number}]:"); line.push_str(&rewritten_rest); DefinitionLine { index, @@ -341,12 +339,8 @@ fn numeric_candidate_from_line(line: &str, index: usize) -> Option().ok()?; let indent = caps.name("indent").map_or("", |m| m.as_str()).to_string(); let rest = caps.name("rest").map_or("", |m| m.as_str()).to_string(); - let num_match = caps - .name("num") - .expect("numeric list capture missing number"); - let rest_match = caps - .name("rest") - .expect("numeric list capture missing rest"); + let num_match = caps.name("num")?; + let rest_match = caps.name("rest")?; let whitespace = line[num_match.end() + 1..rest_match.start()].to_string(); Some(NumericCandidate { index, @@ -357,15 +351,7 @@ fn numeric_candidate_from_line(line: &str, index: usize) -> Option, -) -> (DefinitionAccumulator, Vec) { - let mut acc = DefinitionAccumulator { - definitions: Vec::new(), - is_definition_line: vec![false; lines.len()], - }; - let mut numeric_candidates = Vec::new(); +fn collect_scan_updates(lines: &[String], state: &mut DefinitionScanState<'_>) { let mut in_fence = false; for (index, line) in lines.iter().enumerate() { @@ -378,52 +364,49 @@ fn collect_scan_updates( } if let Some(parts) = parse_definition(line) { - acc.definitions.push(definition_line_from_parts( + state.definitions.push(definition_line_from_parts( index, parts, - ctx.mapping, - ctx.next_number, + state.mapping, + state.next_number, )); - acc.is_definition_line[index] = true; + state.is_definition_line[index] = true; continue; } - if !should_convert_numeric_line(index, ctx.numeric_list_range, ctx.skip_numeric_conversion) - { + if !should_convert_numeric_line( + index, + state.numeric_list_range, + state.skip_numeric_conversion, + ) { continue; } - if ctx.mapping.is_empty() && acc.definitions.is_empty() { + if state.mapping.is_empty() && state.definitions.is_empty() { continue; } if let Some(candidate) = numeric_candidate_from_line(line, index) { - numeric_candidates.push(candidate); + state.numeric_candidates.push(candidate); } } - - (acc, numeric_candidates) } -fn finalize_numeric_candidates( - numeric_candidates: Vec, - ctx: &mut DefinitionScanContext<'_>, - acc: &mut DefinitionAccumulator, -) { - for candidate in numeric_candidates.into_iter().rev() { - let new_number = assign_new_number(ctx.mapping, candidate.number, ctx.next_number); - let rewritten_rest = rewrite_tokens(&candidate.rest, ctx.mapping); +fn finalize_numeric_candidates(state: &mut DefinitionScanState<'_>) { + for candidate in state.numeric_candidates.drain(..).rev() { + let new_number = assign_new_number(state.mapping, candidate.number, state.next_number); + let rewritten_rest = rewrite_tokens(&candidate.rest, state.mapping); let mut line = String::with_capacity( candidate.indent.len() + candidate.whitespace.len() + rewritten_rest.len() + 8, ); line.push_str(&candidate.indent); - write!(&mut line, "[^{new_number}]:").expect("write to string cannot fail"); + let _ = write!(&mut line, "[^{new_number}]:"); line.push_str(&candidate.whitespace); line.push_str(&rewritten_rest); - acc.definitions.push(DefinitionLine { + state.definitions.push(DefinitionLine { index: candidate.index, new_number, line, }); - acc.is_definition_line[candidate.index] = true; + state.is_definition_line[candidate.index] = true; } } @@ -436,18 +419,21 @@ fn collect_definition_updates( let skip_numeric_conversion = numeric_list_range .as_ref() .is_some_and(|(start, _)| has_existing_footnote_block(lines, *start)); - let mut ctx = DefinitionScanContext { + let mut state = DefinitionScanState { mapping, next_number: &mut next_number, numeric_list_range, skip_numeric_conversion, + definitions: Vec::new(), + is_definition_line: vec![false; lines.len()], + numeric_candidates: Vec::new(), }; - let (mut acc, numeric_candidates) = collect_scan_updates(lines, &mut ctx); - finalize_numeric_candidates(numeric_candidates, &mut ctx, &mut acc); + collect_scan_updates(lines, &mut state); + finalize_numeric_candidates(&mut state); DefinitionUpdates { - definitions: acc.definitions, - is_definition_line: acc.is_definition_line, + definitions: state.definitions, + is_definition_line: state.is_definition_line, } } diff --git a/src/html.rs b/src/html.rs index b8c6e326..e1493e11 100644 --- a/src/html.rs +++ b/src/html.rs @@ -208,43 +208,34 @@ fn table_lines_to_markdown(lines: &[String]) -> Vec { out } -fn append_html_table_line(line: &str, buf: &mut Vec, depth: &mut usize) { - buf.push(line.to_string()); - *depth += TABLE_START_RE.find_iter(line).count(); - if TABLE_END_RE.is_match(line) { - *depth = depth.saturating_sub(TABLE_END_RE.find_iter(line).count()); - } -} - -fn flush_completed_html_table(buf: &mut Vec, depth: usize, out: &mut Vec) -> bool { - if depth != 0 { - return false; - } - out.extend(table_lines_to_markdown(buf)); - buf.clear(); - true -} - #[derive(Default)] struct HtmlTableState { buf: Vec, depth: usize, - in_html: bool, } impl HtmlTableState { + fn in_html(&self) -> bool { !self.buf.is_empty() } + fn flush_raw(&mut self, out: &mut Vec) { if !self.buf.is_empty() { out.append(&mut self.buf); } self.depth = 0; - self.in_html = false; } fn push_html_line(&mut self, line: &str, out: &mut Vec) { - append_html_table_line(line, &mut self.buf, &mut self.depth); - if flush_completed_html_table(&mut self.buf, self.depth, out) { - self.in_html = false; + let trimmed = line.trim_start(); + self.buf.push(line.to_string()); + self.depth += TABLE_START_RE.find_iter(trimmed).count(); + if TABLE_END_RE.is_match(line) { + self.depth = self + .depth + .saturating_sub(TABLE_END_RE.find_iter(line).count()); + } + if self.depth == 0 { + out.extend(table_lines_to_markdown(&self.buf)); + self.buf.clear(); } } } @@ -274,22 +265,18 @@ impl HtmlTableState { /// ``` pub(crate) fn html_table_to_markdown(lines: &[String]) -> Vec { let mut out = Vec::new(); - let mut buf = Vec::new(); - let mut depth = 0usize; + let mut html_state = HtmlTableState::default(); for line in lines { - if depth > 0 || TABLE_START_RE.is_match(line.trim_start()) { - append_html_table_line(line, &mut buf, &mut depth); - let _ = flush_completed_html_table(&mut buf, depth, &mut out); + if html_state.in_html() || TABLE_START_RE.is_match(line.trim_start()) { + html_state.push_html_line(line, &mut out); continue; } out.push(line.clone()); } - if !buf.is_empty() { - out.extend(buf); - } + html_state.flush_raw(&mut out); out } @@ -325,7 +312,7 @@ pub fn convert_html_tables(lines: &[String]) -> Vec { for line in lines { if is_fence(line).is_some() { - if html_state.in_html { + if html_state.in_html() { html_state.flush_raw(&mut out); } in_code = !in_code; @@ -338,13 +325,12 @@ pub fn convert_html_tables(lines: &[String]) -> Vec { continue; } - if html_state.in_html { + if html_state.in_html() { html_state.push_html_line(line, &mut out); continue; } if TABLE_START_RE.is_match(line.trim_start()) { - html_state.in_html = true; html_state.push_html_line(line, &mut out); continue; } @@ -352,9 +338,7 @@ pub fn convert_html_tables(lines: &[String]) -> Vec { out.push(line.clone()); } - if !html_state.buf.is_empty() { - out.extend(html_state.buf); - } + html_state.flush_raw(&mut out); out } diff --git a/src/lists.rs b/src/lists.rs index 4fdd5748..98a44c4b 100644 --- a/src/lists.rs +++ b/src/lists.rs @@ -61,12 +61,18 @@ fn is_plain_paragraph_line(line: &str) -> bool { ) } +#[derive(Default)] struct ListState { indent_stack: Vec, counters: HashMap, } impl ListState { + fn reset(&mut self) { + self.indent_stack.clear(); + self.counters.clear(); + } + fn prune_deeper(&mut self, indent: usize, inclusive: bool) { prune_deeper( indent, @@ -76,6 +82,17 @@ impl ListState { ); } + fn next_number(&mut self, indent: usize) -> usize { + self.prune_deeper(indent, false); + if self.indent_stack.last().is_none_or(|&d| d < indent) { + self.indent_stack.push(indent); + } + let num = self.counters.entry(indent).or_insert(1); + let current = *num; + *num += 1; + current + } + fn handle_paragraph_restart(&mut self, indent: usize, line: &str, prev_blank: bool) -> bool { let inclusive = prev_blank && self @@ -97,10 +114,7 @@ impl ListState { #[must_use] pub fn renumber_lists(lines: &[String]) -> Vec { let mut out = Vec::with_capacity(lines.len()); - let mut state = ListState { - indent_stack: Vec::new(), - counters: HashMap::new(), - }; + let mut state = ListState::default(); // Track fenced code blocks consistently across list processing. let mut fences = FenceTracker::default(); #[allow(clippy::unnecessary_map_or)] @@ -123,13 +137,7 @@ pub fn renumber_lists(lines: &[String]) -> Vec { continue; } if let Some((indent, indent_str, sep, rest)) = parse_numbered(line) { - state.prune_deeper(indent, false); - if state.indent_stack.last().is_none_or(|&d| d < indent) { - state.indent_stack.push(indent); - } - let num = state.counters.entry(indent).or_insert(1); - let current = *num; - *num += 1; + let current = state.next_number(indent); out.push(format!("{indent_str}{current}.{sep}{rest}")); prev_blank = false; continue; @@ -141,8 +149,7 @@ pub fn renumber_lists(lines: &[String]) -> Vec { let indent_str = &line[..indent_end]; let indent = indent_len(indent_str); if HEADING_RE.is_match(line) || THEMATIC_BREAK_RE.is_match(line.trim_end()) { - state.indent_stack.clear(); - state.counters.clear(); + state.reset(); out.push(line.clone()); prev_blank = false; continue; diff --git a/src/wrap.rs b/src/wrap.rs index 4499eb62..38cb63cd 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -8,8 +8,6 @@ //! The [`Token`] enum and [`tokenize_markdown`] function are public so callers //! can perform custom token-based processing. -use std::borrow::Cow; - mod block; mod fence; mod inline; @@ -19,7 +17,7 @@ mod tokenize; use block::{BLOCKQUOTE_RE, BULLET_RE, FOOTNOTE_RE}; pub(crate) use block::{BlockKind, classify_block}; pub use fence::{FenceTracker, is_fence}; -use paragraph::{ParagraphState, ParagraphWriter, PrefixLine}; +use paragraph::{ParagraphWriter, PrefixLine}; /// Token emitted by the `tokenize::segment_inline` parser and used by /// higher-level wrappers. /// @@ -48,51 +46,44 @@ fn is_indented_code_line(line: &str) -> bool { indent_width >= 4 && line.chars().any(|c| !c.is_whitespace()) } -fn is_table_or_separator(line: &str) -> bool { - line.trim_start().starts_with('|') || crate::table::SEP_RE.is_match(line.trim()) -} - fn is_passthrough_block(line: &str) -> bool { - is_table_or_separator(line) + line.trim_start().starts_with('|') + || crate::table::SEP_RE.is_match(line.trim()) || matches!( classify_block(line), Some(BlockKind::Heading | BlockKind::MarkdownlintDirective) ) - || line.trim().is_empty() || is_indented_code_line(line) } fn prefix_line(line: &str) -> Option> { if let Some(cap) = BULLET_RE.captures(line) { - let prefix = cap.get(1).expect("bullet regex capture").as_str(); - let rest = cap.get(2).expect("bullet regex remainder capture").as_str(); + let prefix = cap.get(1).map(|m| m.as_str())?; + let rest = cap.get(2).map(|m| m.as_str())?; return Some(PrefixLine { - prefix: Cow::Borrowed(prefix), + prefix: prefix.to_string(), rest, repeat_prefix: false, }); } if let Some(cap) = FOOTNOTE_RE.captures(line) { - let prefix = cap.get(1).expect("footnote prefix capture").as_str(); - let marker = cap.get(2).expect("footnote marker capture").as_str(); - let rest = cap - .get(3) - .expect("footnote regex remainder capture") - .as_str(); + let prefix = cap.get(1).map(|m| m.as_str())?; + let marker = cap.get(2).map(|m| m.as_str())?; + let rest = cap.get(3).map(|m| m.as_str())?; return Some(PrefixLine { - prefix: Cow::Owned(format!("{prefix}{marker}")), + prefix: format!("{prefix}{marker}"), rest, repeat_prefix: false, }); } - BLOCKQUOTE_RE.captures(line).map(|cap| PrefixLine { - prefix: Cow::Borrowed(cap.get(1).expect("blockquote prefix capture").as_str()), - rest: cap - .get(2) - .expect("blockquote regex remainder capture") - .as_str(), + let cap = BLOCKQUOTE_RE.captures(line)?; + let prefix = cap.get(1).map(|m| m.as_str())?; + let rest = cap.get(2).map(|m| m.as_str())?; + Some(PrefixLine { + prefix: prefix.to_string(), + rest, repeat_prefix: true, }) } @@ -117,43 +108,44 @@ fn line_break_parts(line: &str) -> (String, bool) { } /// Wrap text lines to the given width. -/// -/// # Panics -/// Panics if regex captures fail unexpectedly. #[must_use] pub fn wrap_text(lines: &[String], width: usize) -> Vec { let mut out = Vec::new(); - let mut state = ParagraphState::default(); let mut writer = ParagraphWriter::new(&mut out, width); // Track fenced code blocks so wrapping honours shared fence semantics. let mut fence_tracker = FenceTracker::default(); for line in lines { - if fence::handle_fence_line(line, &mut writer, &mut state, &mut fence_tracker) { + if fence::handle_fence_line(line, &mut writer, &mut fence_tracker) { continue; } if fence_tracker.in_fence() { - writer.push_verbatim(&mut state, line); + writer.push_verbatim(line); + continue; + } + + if line.trim().is_empty() { + writer.push_blank_line(); continue; } if is_passthrough_block(line) { - writer.push_verbatim(&mut state, line); + writer.push_verbatim(line); continue; } if let Some(prefix_line) = prefix_line(line) { - writer.handle_prefix_line(&mut state, &prefix_line); + writer.handle_prefix_line(&prefix_line); continue; } - state.note_indent(line); + writer.note_indent(line); let (text, hard_break) = line_break_parts(line); - state.push(text, hard_break); + writer.push_wrapped(text, hard_break); } - writer.flush_paragraph(&mut state); + writer.flush_paragraph(); out } diff --git a/src/wrap/fence.rs b/src/wrap/fence.rs index 1eaea4f4..259b8185 100644 --- a/src/wrap/fence.rs +++ b/src/wrap/fence.rs @@ -2,7 +2,7 @@ use regex::Regex; -use super::paragraph::{ParagraphState, ParagraphWriter}; +use super::paragraph::ParagraphWriter; pub(super) static FENCE_RE: std::sync::LazyLock = // Capture: indent, fence run of 3+ backticks/tilde, and the full info string (incl. leading @@ -45,14 +45,13 @@ pub fn is_fence(line: &str) -> Option<(&str, &str, &str)> { pub(crate) fn handle_fence_line( line: &str, writer: &mut ParagraphWriter<'_>, - state: &mut ParagraphState, tracker: &mut FenceTracker, ) -> bool { if !tracker.observe(line) { return false; } - writer.push_verbatim(state, line); + writer.push_verbatim(line); true } diff --git a/src/wrap/inline.rs b/src/wrap/inline.rs index 8b8027fd..d61cfe1e 100644 --- a/src/wrap/inline.rs +++ b/src/wrap/inline.rs @@ -230,10 +230,7 @@ pub(super) fn wrap_preserving_code(text: &str, width: usize) -> Vec { continue; } - let mut split = SplitContext { - lines: &mut lines, - width, - }; + let mut split = SplitContext::new(&mut lines, width); if buffer.split_with_span(&mut split, &tokens, span.clone()) { i = group_end; continue; diff --git a/src/wrap/line_buffer.rs b/src/wrap/line_buffer.rs index 0eafa3b2..7b4ef23d 100644 --- a/src/wrap/line_buffer.rs +++ b/src/wrap/line_buffer.rs @@ -19,6 +19,10 @@ pub(crate) struct SplitContext<'a> { pub(crate) width: usize, } +impl<'a> SplitContext<'a> { + pub(crate) fn new(lines: &'a mut Vec, width: usize) -> Self { Self { lines, width } } +} + impl LineBuffer { pub(crate) fn new() -> Self { Self::default() } diff --git a/src/wrap/paragraph.rs b/src/wrap/paragraph.rs index 358c946f..7ee0ba64 100644 --- a/src/wrap/paragraph.rs +++ b/src/wrap/paragraph.rs @@ -3,28 +3,31 @@ //! These helpers keep paragraph logic focused on buffer management while //! deferring inline wrapping to `inline::wrap_preserving_code`. -use std::borrow::Cow; - use unicode_width::UnicodeWidthStr; use super::inline::wrap_preserving_code; pub(super) struct PrefixLine<'a> { - pub(super) prefix: Cow<'a, str>, + pub(super) prefix: String, pub(super) rest: &'a str, pub(super) repeat_prefix: bool, } -#[derive(Default)] -pub(super) struct ParagraphState { +pub(super) struct ParagraphWriter<'a> { + out: &'a mut Vec, + width: usize, buf: Vec<(String, bool)>, indent: String, } -impl ParagraphState { - pub(super) fn clear(&mut self) { - self.buf.clear(); - self.indent.clear(); +impl<'a> ParagraphWriter<'a> { + pub(super) fn new(out: &'a mut Vec, width: usize) -> Self { + Self { + out, + width, + buf: Vec::new(), + indent: String::new(), + } } pub(super) fn note_indent(&mut self, line: &str) { @@ -33,87 +36,78 @@ impl ParagraphState { } } - pub(super) fn push(&mut self, text: String, hard_break: bool) { + pub(super) fn push_wrapped(&mut self, text: String, hard_break: bool) { self.buf.push((text, hard_break)); } -} -pub(super) struct ParagraphWriter<'a> { - out: &'a mut Vec, - width: usize, -} - -impl<'a> ParagraphWriter<'a> { - pub(super) fn new(out: &'a mut Vec, width: usize) -> Self { Self { out, width } } - - fn append_wrapped_with_prefix(&mut self, line: &PrefixLine<'_>) { - let prefix = line.prefix.as_ref(); - let prefix_width = UnicodeWidthStr::width(prefix); - let available = self.width.saturating_sub(prefix_width).max(1); - let indent_str: String = prefix.chars().take_while(|c| c.is_whitespace()).collect(); - let indent_width = UnicodeWidthStr::width(indent_str.as_str()); - let wrapped_indent = if line.repeat_prefix { - prefix.to_string() - } else { - format!("{}{}", indent_str, " ".repeat(prefix_width - indent_width)) - }; - - let lines = wrap_preserving_code(line.rest, available); - if lines.is_empty() { - self.out.push(prefix.to_string()); - return; - } - - for (index, wrapped_line) in lines.iter().enumerate() { - if index == 0 { - self.out.push(format!("{prefix}{wrapped_line}")); - } else { - self.out.push(format!("{wrapped_indent}{wrapped_line}")); - } + fn push_wrapped_segment(&mut self, indent: &str, segment: &str) { + let indent_width = UnicodeWidthStr::width(indent); + let available = self.width.saturating_sub(indent_width).max(1); + for line in wrap_preserving_code(segment, available) { + self.out.push(format!("{indent}{line}")); } } - pub(super) fn flush_paragraph(&mut self, state: &mut ParagraphState) { - if state.buf.is_empty() { + pub(super) fn flush_paragraph(&mut self) { + if self.buf.is_empty() { return; } + let indent = std::mem::take(&mut self.indent); + let buf = std::mem::take(&mut self.buf); let mut segment = String::new(); - for (text, hard_break) in &state.buf { + for (text, hard_break) in &buf { if !segment.is_empty() { segment.push(' '); } segment.push_str(text); if *hard_break { - self.push_wrapped_segment(&state.indent, &segment); + self.push_wrapped_segment(&indent, &segment); segment.clear(); } } if !segment.is_empty() { - self.push_wrapped_segment(&state.indent, &segment); + self.push_wrapped_segment(&indent, &segment); } - - state.clear(); } - fn push_wrapped_segment(&mut self, indent: &str, segment: &str) { - for line in wrap_preserving_code(segment, self.width - indent.len()) { - self.out.push(format!("{indent}{line}")); - } + pub(super) fn push_verbatim(&mut self, line: &str) { + self.flush_paragraph(); + self.out.push(line.to_string()); } - pub(super) fn push_verbatim(&mut self, state: &mut ParagraphState, line: &str) { - self.flush_paragraph(state); - self.out.push(line.to_string()); + pub(super) fn push_blank_line(&mut self) { + self.flush_paragraph(); + self.out.push(String::new()); } - pub(super) fn handle_prefix_line( - &mut self, - state: &mut ParagraphState, - prefix_line: &PrefixLine<'_>, - ) { - self.flush_paragraph(state); - self.append_wrapped_with_prefix(prefix_line); + pub(super) fn handle_prefix_line(&mut self, prefix_line: &PrefixLine<'_>) { + self.flush_paragraph(); + + let prefix = prefix_line.prefix.as_str(); + let prefix_width = UnicodeWidthStr::width(prefix); + let available = self.width.saturating_sub(prefix_width).max(1); + let indent_str: String = prefix.chars().take_while(|c| c.is_whitespace()).collect(); + let indent_width = UnicodeWidthStr::width(indent_str.as_str()); + let wrapped_indent = if prefix_line.repeat_prefix { + prefix.to_string() + } else { + format!("{}{}", indent_str, " ".repeat(prefix_width - indent_width)) + }; + + let lines = wrap_preserving_code(prefix_line.rest, available); + if lines.is_empty() { + self.out.push(prefix.to_string()); + return; + } + + for (index, wrapped_line) in lines.iter().enumerate() { + if index == 0 { + self.out.push(format!("{prefix}{wrapped_line}")); + } else { + self.out.push(format!("{wrapped_indent}{wrapped_line}")); + } + } } } diff --git a/src/wrap/tests.rs b/src/wrap/tests.rs index 6e244eae..2fbabaae 100644 --- a/src/wrap/tests.rs +++ b/src/wrap/tests.rs @@ -123,10 +123,7 @@ fn line_buffer_split_preserves_multi_space_lines() { buffer.push_span(&tokens, 0, 2); let mut lines = Vec::new(); - let mut split = SplitContext { - lines: &mut lines, - width: 8, - }; + let mut split = SplitContext::new(&mut lines, 8); assert!(buffer.split_with_span(&mut split, &tokens, 2..4)); assert_eq!(lines, vec!["alpha ".to_string()]); assert_eq!(buffer.text(), "beta "); @@ -143,10 +140,7 @@ fn line_buffer_split_trims_single_trailing_space() { buffer.push_span(&tokens, 0, 2); let mut lines = Vec::new(); - let mut split = SplitContext { - lines: &mut lines, - width: 5, - }; + let mut split = SplitContext::new(&mut lines, 5); assert!(buffer.split_with_span(&mut split, &tokens, 2..3)); assert_eq!(lines, vec!["alpha".to_string()]); assert_eq!(buffer.text(), "beta"); @@ -168,10 +162,7 @@ fn line_buffer_split_tracks_multiple_whitespace_tokens() { buffer.push_span(&tokens, 0, 3); let mut lines = Vec::new(); - let mut split = SplitContext { - lines: &mut lines, - width: 4, - }; + let mut split = SplitContext::new(&mut lines, 4); assert!(buffer.split_with_span(&mut split, &tokens, 3..4)); assert_eq!(lines, vec!["foo ".to_string()]); assert_eq!(buffer.text(), "bar"); @@ -308,6 +299,20 @@ fn wrap_text_preserves_code_spans() { ); } +#[test] +fn wrap_text_normalizes_whitespace_only_lines() { + let input = vec![String::new(), " ".to_string(), "\t\t".to_string()]; + let wrapped = wrap_text(&input, 80); + assert_eq!(wrapped, vec![String::new(), String::new(), String::new()]); +} + +#[test] +fn wrap_text_uses_display_width_for_unicode_indent() { + let input = vec![" a".to_string()]; + let wrapped = wrap_text(&input, 2); + assert_eq!(wrapped, vec![" a".to_string()]); +} + #[test] fn wrap_text_multiple_code_spans() { let input = vec!["combine `foo bar` and `baz qux` in one line".to_string()]; diff --git a/tests/table/convert_html.rs b/tests/table/convert_html.rs index 21f57e24..38099a62 100644 --- a/tests/table/convert_html.rs +++ b/tests/table/convert_html.rs @@ -77,3 +77,19 @@ fn preserves_trailing_spaces_in_cells() { ]; assert_eq!(convert_html_tables(&input), expected); } + +#[test] +fn converts_indented_multiline_html_table() { + let input = lines_vec![ + " ", + " ", + " ", + "
AB
12
", + ]; + let expected = lines_vec![ + " | A | B |", + " | --- | --- |", + " | 1 | 2 |", + ]; + assert_eq!(convert_html_tables(&input), expected); +} From 58f905aee62cb370f15b4a1882de05b4628016ab Mon Sep 17 00:00:00 2001 From: Payton McIntosh Date: Sat, 18 Apr 2026 17:24:01 +0100 Subject: [PATCH 2/6] Tighten review follow-ups for tables and footnotes Keep HTML table depth tracking on a consistent trimmed view, restore borrowed prefix handling for common wrap prefixes, and add regression coverage for whitespace-only paragraph breaks and indented HTML table transitions. Document why numeric footnote candidates are finalized in reverse and add targeted renumbering tests covering existing definition headers, numeric candidates, and malformed candidate lines. --- src/footnotes/renumber.rs | 42 +++++++++++++++++++++++++++++++++++++ src/html.rs | 4 ++-- src/wrap.rs | 8 ++++--- src/wrap/paragraph.rs | 6 ++++-- src/wrap/tests.rs | 10 +++++++++ tests/table/convert_html.rs | 20 ++++++++++++++++++ 6 files changed, 83 insertions(+), 7 deletions(-) diff --git a/src/footnotes/renumber.rs b/src/footnotes/renumber.rs index 98471eed..33455a19 100644 --- a/src/footnotes/renumber.rs +++ b/src/footnotes/renumber.rs @@ -391,6 +391,9 @@ fn collect_scan_updates(lines: &[String], state: &mut DefinitionScanState<'_>) { } fn finalize_numeric_candidates(state: &mut DefinitionScanState<'_>) { + // Drain from the bottom so wrapped continuation lines stay attached to the + // correct definition when numeric candidates are later reordered by their + // assigned footnote numbers. for candidate in state.numeric_candidates.drain(..).rev() { let new_number = assign_new_number(state.mapping, candidate.number, state.next_number); let rewritten_rest = rewrite_tokens(&candidate.rest, state.mapping); @@ -484,3 +487,42 @@ pub(super) fn renumber_footnotes(lines: &mut [String]) { reorder_definition_block(lines, start, end, &definitions); } } + +#[cfg(test)] +mod tests { + use super::{numeric_candidate_from_line, renumber_footnotes}; + + fn strings(lines: &[&str]) -> Vec { + lines.iter().map(|line| (*line).to_string()).collect() + } + + #[test] + fn malformed_numeric_candidate_line_is_ignored() { + assert!(numeric_candidate_from_line("7.", 0).is_none()); + assert!(numeric_candidate_from_line("7:", 0).is_none()); + } + + #[test] + fn renumber_footnotes_rewrites_existing_definition_headers() { + let mut lines = strings(&["Reference.[^7]", "", "[^7]: Existing definition"]); + + renumber_footnotes(&mut lines); + + assert_eq!( + lines, + strings(&["Reference.[^1]", "", "[^1]: Existing definition"]) + ); + } + + #[test] + fn renumber_footnotes_rewrites_numeric_candidates() { + let mut lines = strings(&["Reference.[^7]", "", "7. Legacy footnote"]); + + renumber_footnotes(&mut lines); + + assert_eq!( + lines, + strings(&["Reference.[^1]", "", "[^1]: Legacy footnote"]) + ); + } +} diff --git a/src/html.rs b/src/html.rs index e1493e11..5a13f728 100644 --- a/src/html.rs +++ b/src/html.rs @@ -228,10 +228,10 @@ impl HtmlTableState { let trimmed = line.trim_start(); self.buf.push(line.to_string()); self.depth += TABLE_START_RE.find_iter(trimmed).count(); - if TABLE_END_RE.is_match(line) { + if TABLE_END_RE.is_match(trimmed) { self.depth = self .depth - .saturating_sub(TABLE_END_RE.find_iter(line).count()); + .saturating_sub(TABLE_END_RE.find_iter(trimmed).count()); } if self.depth == 0 { out.extend(table_lines_to_markdown(&self.buf)); diff --git a/src/wrap.rs b/src/wrap.rs index 38cb63cd..7cab0952 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -8,6 +8,8 @@ //! The [`Token`] enum and [`tokenize_markdown`] function are public so callers //! can perform custom token-based processing. +use std::borrow::Cow; + mod block; mod fence; mod inline; @@ -61,7 +63,7 @@ fn prefix_line(line: &str) -> Option> { let prefix = cap.get(1).map(|m| m.as_str())?; let rest = cap.get(2).map(|m| m.as_str())?; return Some(PrefixLine { - prefix: prefix.to_string(), + prefix: Cow::Borrowed(prefix), rest, repeat_prefix: false, }); @@ -72,7 +74,7 @@ fn prefix_line(line: &str) -> Option> { let marker = cap.get(2).map(|m| m.as_str())?; let rest = cap.get(3).map(|m| m.as_str())?; return Some(PrefixLine { - prefix: format!("{prefix}{marker}"), + prefix: Cow::Owned(format!("{prefix}{marker}")), rest, repeat_prefix: false, }); @@ -82,7 +84,7 @@ fn prefix_line(line: &str) -> Option> { let prefix = cap.get(1).map(|m| m.as_str())?; let rest = cap.get(2).map(|m| m.as_str())?; Some(PrefixLine { - prefix: prefix.to_string(), + prefix: Cow::Borrowed(prefix), rest, repeat_prefix: true, }) diff --git a/src/wrap/paragraph.rs b/src/wrap/paragraph.rs index 7ee0ba64..ddcce029 100644 --- a/src/wrap/paragraph.rs +++ b/src/wrap/paragraph.rs @@ -3,12 +3,14 @@ //! These helpers keep paragraph logic focused on buffer management while //! deferring inline wrapping to `inline::wrap_preserving_code`. +use std::borrow::Cow; + use unicode_width::UnicodeWidthStr; use super::inline::wrap_preserving_code; pub(super) struct PrefixLine<'a> { - pub(super) prefix: String, + pub(super) prefix: Cow<'a, str>, pub(super) rest: &'a str, pub(super) repeat_prefix: bool, } @@ -85,7 +87,7 @@ impl<'a> ParagraphWriter<'a> { pub(super) fn handle_prefix_line(&mut self, prefix_line: &PrefixLine<'_>) { self.flush_paragraph(); - let prefix = prefix_line.prefix.as_str(); + let prefix = prefix_line.prefix.as_ref(); let prefix_width = UnicodeWidthStr::width(prefix); let available = self.width.saturating_sub(prefix_width).max(1); let indent_str: String = prefix.chars().take_while(|c| c.is_whitespace()).collect(); diff --git a/src/wrap/tests.rs b/src/wrap/tests.rs index 2fbabaae..63f1cf9b 100644 --- a/src/wrap/tests.rs +++ b/src/wrap/tests.rs @@ -306,6 +306,16 @@ fn wrap_text_normalizes_whitespace_only_lines() { assert_eq!(wrapped, vec![String::new(), String::new(), String::new()]); } +#[test] +fn wrap_text_treats_whitespace_only_lines_as_paragraph_breaks() { + let input = vec!["foo".to_string(), " ".to_string(), "bar".to_string()]; + let wrapped = wrap_text(&input, 80); + assert_eq!( + wrapped, + vec!["foo".to_string(), String::new(), "bar".to_string()] + ); +} + #[test] fn wrap_text_uses_display_width_for_unicode_indent() { let input = vec![" a".to_string()]; diff --git a/tests/table/convert_html.rs b/tests/table/convert_html.rs index 38099a62..7b45072e 100644 --- a/tests/table/convert_html.rs +++ b/tests/table/convert_html.rs @@ -93,3 +93,23 @@ fn converts_indented_multiline_html_table() { ]; assert_eq!(convert_html_tables(&input), expected); } + +#[test] +fn converts_indented_table_without_touching_surrounding_content() { + let input = lines_vec![ + "
before
", + " ", + " ", + " ", + "
AB
12
", + "

after

", + ]; + let expected = lines_vec![ + "
before
", + " | A | B |", + " | --- | --- |", + " | 1 | 2 |", + "

after

", + ]; + assert_eq!(convert_html_tables(&input), expected); +} From 088948a51d3752506922f3d87a052264d4cb6153 Mon Sep 17 00:00:00 2001 From: Payton McIntosh Date: Sat, 18 Apr 2026 17:30:07 +0100 Subject: [PATCH 3/6] Finish review fixes for renumber and wrap tests Replace the silent `write!` header assembly in the footnote renumberer with infallible string construction, and stop parsing numeric candidates through panic-prone capture indexing. Move the `wrap_text` regression coverage out of the oversized `src/wrap/tests.rs` file into the existing dedicated integration test surface, and strengthen the Unicode-indent case so it proves display width rather than byte length. --- src/footnotes/renumber.rs | 12 ++-- src/wrap/tests.rs | 129 -------------------------------------- tests/wrap_unit.rs | 24 +++++++ 3 files changed, 31 insertions(+), 134 deletions(-) diff --git a/src/footnotes/renumber.rs b/src/footnotes/renumber.rs index 33455a19..c50ace3c 100644 --- a/src/footnotes/renumber.rs +++ b/src/footnotes/renumber.rs @@ -1,6 +1,6 @@ //! Sequential renumbering of footnote references and definitions. -use std::{collections::HashMap, fmt::Write, sync::LazyLock}; +use std::{collections::HashMap, sync::LazyLock}; use regex::{Captures, Match, Regex}; @@ -325,7 +325,8 @@ fn definition_line_from_parts( let rewritten_rest = rewrite_tokens(parts.rest, mapping); let mut line = String::with_capacity(parts.prefix.len() + rewritten_rest.len() + 8); line.push_str(parts.prefix); - let _ = write!(&mut line, "[^{new_number}]:"); + let header = format!("[^{new_number}]:"); + line.push_str(&header); line.push_str(&rewritten_rest); DefinitionLine { index, @@ -336,11 +337,11 @@ fn definition_line_from_parts( fn numeric_candidate_from_line(line: &str, index: usize) -> Option { let caps = FOOTNOTE_LINE_RE.captures(line)?; - let number = caps["num"].parse::().ok()?; let indent = caps.name("indent").map_or("", |m| m.as_str()).to_string(); - let rest = caps.name("rest").map_or("", |m| m.as_str()).to_string(); let num_match = caps.name("num")?; let rest_match = caps.name("rest")?; + let number = num_match.as_str().parse::().ok()?; + let rest = rest_match.as_str().to_string(); let whitespace = line[num_match.end() + 1..rest_match.start()].to_string(); Some(NumericCandidate { index, @@ -401,7 +402,8 @@ fn finalize_numeric_candidates(state: &mut DefinitionScanState<'_>) { candidate.indent.len() + candidate.whitespace.len() + rewritten_rest.len() + 8, ); line.push_str(&candidate.indent); - let _ = write!(&mut line, "[^{new_number}]:"); + let header = format!("[^{new_number}]:"); + line.push_str(&header); line.push_str(&candidate.whitespace); line.push_str(&rewritten_rest); state.definitions.push(DefinitionLine { diff --git a/src/wrap/tests.rs b/src/wrap/tests.rs index 63f1cf9b..83f89029 100644 --- a/src/wrap/tests.rs +++ b/src/wrap/tests.rs @@ -247,135 +247,6 @@ fn wrap_preserving_code_preserves_carry_whitespace( assert_eq!(lines.concat(), input); } -#[test] -fn wrap_text_preserves_hyphenated_words() { - let input = vec!["A word that is very-long-word indeed".to_string()]; - let wrapped = wrap_text(&input, 20); - assert_eq!( - wrapped, - vec![ - "A word that is".to_string(), - "very-long-word".to_string(), - "indeed".to_string(), - ] - ); -} - -#[test] -fn wrap_text_does_not_insert_spaces_in_hyphenated_words() { - let input = vec![ - concat!( - "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt ", - "elit-sed fermentum congue. Vivamus dictum nulla sed consectetur ", - "volutpat.", - ) - .to_string(), - ]; - let wrapped = wrap_text(&input, 80); - assert_eq!( - wrapped, - vec![ - "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt".to_string(), - "elit-sed fermentum congue. Vivamus dictum nulla sed consectetur volutpat.".to_string(), - ] - ); -} - -#[test] -fn wrap_text_preserves_code_spans() { - let input = vec![ - "with their own escaping rules. On Windows, scripts default to `powershell -Command` \ - unless the manifest's `interpreter` field overrides the setting." - .to_string(), - ]; - let wrapped = wrap_text(&input, 60); - assert_eq!( - wrapped, - vec![ - "with their own escaping rules. On Windows, scripts default".to_string(), - "to `powershell -Command` unless the manifest's".to_string(), - "`interpreter` field overrides the setting.".to_string(), - ] - ); -} - -#[test] -fn wrap_text_normalizes_whitespace_only_lines() { - let input = vec![String::new(), " ".to_string(), "\t\t".to_string()]; - let wrapped = wrap_text(&input, 80); - assert_eq!(wrapped, vec![String::new(), String::new(), String::new()]); -} - -#[test] -fn wrap_text_treats_whitespace_only_lines_as_paragraph_breaks() { - let input = vec!["foo".to_string(), " ".to_string(), "bar".to_string()]; - let wrapped = wrap_text(&input, 80); - assert_eq!( - wrapped, - vec!["foo".to_string(), String::new(), "bar".to_string()] - ); -} - -#[test] -fn wrap_text_uses_display_width_for_unicode_indent() { - let input = vec![" a".to_string()]; - let wrapped = wrap_text(&input, 2); - assert_eq!(wrapped, vec![" a".to_string()]); -} - -#[test] -fn wrap_text_multiple_code_spans() { - let input = vec!["combine `foo bar` and `baz qux` in one line".to_string()]; - let wrapped = wrap_text(&input, 25); - assert_eq!( - wrapped, - vec![ - "combine `foo bar` and".to_string(), - "`baz qux` in one line".to_string(), - ] - ); -} - -#[test] -fn wrap_text_nested_backticks() { - let input = vec!["Use `` `code` `` to quote backticks".to_string()]; - let wrapped = wrap_text(&input, 20); - assert_eq!( - wrapped, - vec![ - "Use `` `code` `` to".to_string(), - "quote backticks".to_string() - ], - ); -} - -#[test] -fn wrap_text_unmatched_backticks() { - let input = vec!["This has a `dangling code span.".to_string()]; - let wrapped = wrap_text(&input, 20); - assert_eq!( - wrapped, - vec!["This has a".to_string(), "`dangling code span.".to_string()], - ); -} - -#[test] -fn wrap_text_preserves_links() { - let input = vec![ - "`falcon-pachinko` is an extension library for the".to_string(), - "[Falcon](https://falcon.readthedocs.io) web framework. It adds a structured".to_string(), - "approach to asynchronous WebSocket routing and background worker integration.".to_string(), - ]; - let wrapped = wrap_text(&input, 80); - let joined = wrapped.join("\n"); - assert_eq!(joined.matches("https://").count(), 1); - assert!( - wrapped - .iter() - .any(|l| l.contains("https://falcon.readthedocs.io")) - ); -} - #[rstest] #[case("trail ", 80, &["trail "])] #[case("`code span` ", 12, &["`code span` "])] diff --git a/tests/wrap_unit.rs b/tests/wrap_unit.rs index 7f127cf9..3d5b69b7 100644 --- a/tests/wrap_unit.rs +++ b/tests/wrap_unit.rs @@ -52,6 +52,30 @@ fn wrap_text_preserves_code_spans() { ); } +#[test] +fn wrap_text_normalizes_whitespace_only_lines() { + let input = vec![String::new(), " ".to_string(), "\t\t".to_string()]; + let wrapped = wrap_text(&input, 80); + assert_eq!(wrapped, vec![String::new(), String::new(), String::new()]); +} + +#[test] +fn wrap_text_treats_whitespace_only_lines_as_paragraph_breaks() { + let input = vec!["foo".to_string(), " ".to_string(), "bar".to_string()]; + let wrapped = wrap_text(&input, 80); + assert_eq!( + wrapped, + vec!["foo".to_string(), String::new(), "bar".to_string()] + ); +} + +#[test] +fn wrap_text_uses_display_width_for_unicode_indent() { + let input = vec![" a b".to_string()]; + let wrapped = wrap_text(&input, 4); + assert_eq!(wrapped, vec![" a".to_string(), " b".to_string()]); +} + #[test] fn wrap_text_multiple_code_spans() { let input = vec!["combine `foo bar` and `baz qux` in one line".to_string()]; From 6536e7a430a06a2caf076f0b7e7748b249cefa82 Mon Sep 17 00:00:00 2001 From: Payton McIntosh Date: Sat, 18 Apr 2026 18:26:32 +0100 Subject: [PATCH 4/6] Split renumber helpers and parameterize table tests Move the footnote renumber unit tests into a dedicated submodule file and extract the dense definition-scanning and reordering helpers into a separate implementation module. This brings `renumber.rs` back under the repository file-size limit without changing the top-level renumbering flow. Collapse the duplicated indented HTML table regression tests into a single `rstest` so the behaviour stays covered with less repetition. --- src/footnotes/renumber.rs | 345 +------------------------- src/footnotes/renumber/definitions.rs | 313 +++++++++++++++++++++++ src/footnotes/renumber/tests.rs | 42 ++++ tests/table/convert_html.rs | 68 ++--- 4 files changed, 402 insertions(+), 366 deletions(-) create mode 100644 src/footnotes/renumber/definitions.rs create mode 100644 src/footnotes/renumber/tests.rs diff --git a/src/footnotes/renumber.rs b/src/footnotes/renumber.rs index c50ace3c..91d6ff5f 100644 --- a/src/footnotes/renumber.rs +++ b/src/footnotes/renumber.rs @@ -4,6 +4,17 @@ use std::{collections::HashMap, sync::LazyLock}; use regex::{Captures, Match, Regex}; +mod definitions; + +#[cfg(test)] +use definitions::numeric_candidate_from_line; +use definitions::{ + DefinitionUpdates, + collect_definition_updates, + reorder_definition_block, + rewrite_definition_headers, +}; + use super::{ lists::{footnote_block_range, has_existing_footnote_block, trimmed_range}, parsing::{FOOTNOTE_LINE_RE, is_definition_continuation, parse_definition}, @@ -129,21 +140,6 @@ fn collect_reference_mapping_from_text( } } -#[derive(Clone)] -struct DefinitionLine { - index: usize, - new_number: usize, - line: String, -} - -struct NumericCandidate { - index: usize, - number: usize, - indent: String, - whitespace: String, - rest: String, -} - fn footnote_definition_block_range(lines: &[String]) -> Option<(usize, usize)> { let (mut start, end) = trimmed_range(lines, |line| { line.trim().is_empty() @@ -167,281 +163,6 @@ fn footnote_definition_block_range(lines: &[String]) -> Option<(usize, usize)> { } } -fn definition_segment_end(lines: &[String], start: usize, block_end: usize) -> usize { - let mut idx = start + 1; - while idx < block_end { - let line = &lines[idx]; - if parse_definition(line).is_some() { - break; - } - if is_definition_continuation(line) { - idx += 1; - continue; - } - if line.trim().is_empty() { - if idx + 1 < block_end && parse_definition(&lines[idx + 1]).is_some() { - break; - } - idx += 1; - continue; - } - break; - } - idx -} - -fn reorder_definition_block( - lines: &mut [String], - start: usize, - end: usize, - definitions: &[DefinitionLine], -) { - let header_positions: Vec = (start..end) - .filter(|&idx| parse_definition(&lines[idx]).is_some()) - .collect(); - if header_positions.len() <= 1 { - return; - } - - let def_lookup: HashMap = definitions - .iter() - .filter(|definition| (start..end).contains(&definition.index)) - .map(|definition| (definition.index, definition)) - .collect(); - if def_lookup.len() <= 1 { - return; - } - - let prefix_len = header_positions.first().map_or(0, |first| first - start); - let mut segments: Vec<(usize, usize, Vec)> = Vec::new(); - let mut consumed = start + prefix_len; - for &position in &header_positions { - let mut leading_start = position; - while leading_start > consumed - && lines[leading_start - 1].trim().is_empty() - && !is_definition_continuation(&lines[leading_start - 1]) - { - leading_start -= 1; - } - let next_bound = definition_segment_end(lines, position, end); - if let Some(definition) = def_lookup.get(&position) { - debug_assert!( - position >= leading_start, - "definition header {position} cannot precede leading segment start {leading_start}", - ); - let mut segment = Vec::with_capacity(next_bound.saturating_sub(leading_start).max(1)); - segment.extend(lines[leading_start..position].iter().cloned()); - segment.push(definition.line.clone()); - let tail_start = position.saturating_add(1); - if tail_start < next_bound { - segment.extend(lines[tail_start..next_bound].iter().cloned()); - } - segments.push((definition.new_number, definition.index, segment)); - } - consumed = next_bound; - } - - if segments.len() <= 1 { - return; - } - - segments.sort_by(|left, right| left.0.cmp(&right.0).then(left.1.cmp(&right.1))); - - let mut first_leading = Vec::new(); - if let Some((_, _, first_segment)) = segments.first_mut() { - while first_segment - .first() - .is_some_and(|line| line.trim().is_empty() && !is_definition_continuation(line)) - { - first_leading.push(first_segment.remove(0)); - } - } - - let mut reordered = Vec::with_capacity(end - start); - if prefix_len > 0 { - reordered.extend(lines[start..start + prefix_len].iter().cloned()); - } - - for (idx, (_, _, segment)) in segments.into_iter().enumerate() { - reordered.extend(segment); - if idx == 0 && !first_leading.is_empty() { - reordered.append(&mut first_leading); - } - } - - if reordered.len() == end - start { - lines[start..end].clone_from_slice(&reordered); - } -} - -struct DefinitionUpdates { - definitions: Vec, - is_definition_line: Vec, -} - -struct DefinitionScanState<'a> { - mapping: &'a mut HashMap, - next_number: &'a mut usize, - numeric_list_range: Option<(usize, usize)>, - skip_numeric_conversion: bool, - definitions: Vec, - is_definition_line: Vec, - numeric_candidates: Vec, -} - -fn assign_new_number( - mapping: &mut HashMap, - number: usize, - next_number: &mut usize, -) -> usize { - if let Some(&mapped) = mapping.get(&number) { - mapped - } else { - let assigned = *next_number; - *next_number += 1; - mapping.insert(number, assigned); - assigned - } -} - -fn should_convert_numeric_line( - index: usize, - numeric_range: Option<(usize, usize)>, - skip_numeric_conversion: bool, -) -> bool { - if skip_numeric_conversion { - return false; - } - numeric_range.is_some_and(|(start, end)| index >= start && index < end) -} - -fn definition_line_from_parts( - index: usize, - parts: super::parsing::DefinitionParts<'_>, - mapping: &mut HashMap, - next_number: &mut usize, -) -> DefinitionLine { - let new_number = assign_new_number(mapping, parts.number, next_number); - let rewritten_rest = rewrite_tokens(parts.rest, mapping); - let mut line = String::with_capacity(parts.prefix.len() + rewritten_rest.len() + 8); - line.push_str(parts.prefix); - let header = format!("[^{new_number}]:"); - line.push_str(&header); - line.push_str(&rewritten_rest); - DefinitionLine { - index, - new_number, - line, - } -} - -fn numeric_candidate_from_line(line: &str, index: usize) -> Option { - let caps = FOOTNOTE_LINE_RE.captures(line)?; - let indent = caps.name("indent").map_or("", |m| m.as_str()).to_string(); - let num_match = caps.name("num")?; - let rest_match = caps.name("rest")?; - let number = num_match.as_str().parse::().ok()?; - let rest = rest_match.as_str().to_string(); - let whitespace = line[num_match.end() + 1..rest_match.start()].to_string(); - Some(NumericCandidate { - index, - number, - indent, - whitespace, - rest, - }) -} - -fn collect_scan_updates(lines: &[String], state: &mut DefinitionScanState<'_>) { - let mut in_fence = false; - - for (index, line) in lines.iter().enumerate() { - if is_fence_line(line) { - in_fence = !in_fence; - continue; - } - if in_fence { - continue; - } - - if let Some(parts) = parse_definition(line) { - state.definitions.push(definition_line_from_parts( - index, - parts, - state.mapping, - state.next_number, - )); - state.is_definition_line[index] = true; - continue; - } - - if !should_convert_numeric_line( - index, - state.numeric_list_range, - state.skip_numeric_conversion, - ) { - continue; - } - if state.mapping.is_empty() && state.definitions.is_empty() { - continue; - } - if let Some(candidate) = numeric_candidate_from_line(line, index) { - state.numeric_candidates.push(candidate); - } - } -} - -fn finalize_numeric_candidates(state: &mut DefinitionScanState<'_>) { - // Drain from the bottom so wrapped continuation lines stay attached to the - // correct definition when numeric candidates are later reordered by their - // assigned footnote numbers. - for candidate in state.numeric_candidates.drain(..).rev() { - let new_number = assign_new_number(state.mapping, candidate.number, state.next_number); - let rewritten_rest = rewrite_tokens(&candidate.rest, state.mapping); - let mut line = String::with_capacity( - candidate.indent.len() + candidate.whitespace.len() + rewritten_rest.len() + 8, - ); - line.push_str(&candidate.indent); - let header = format!("[^{new_number}]:"); - line.push_str(&header); - line.push_str(&candidate.whitespace); - line.push_str(&rewritten_rest); - state.definitions.push(DefinitionLine { - index: candidate.index, - new_number, - line, - }); - state.is_definition_line[candidate.index] = true; - } -} - -fn collect_definition_updates( - lines: &[String], - mapping: &mut HashMap, -) -> DefinitionUpdates { - let mut next_number = mapping.values().copied().max().unwrap_or(0) + 1; - let numeric_list_range = footnote_block_range(lines); - let skip_numeric_conversion = numeric_list_range - .as_ref() - .is_some_and(|(start, _)| has_existing_footnote_block(lines, *start)); - let mut state = DefinitionScanState { - mapping, - next_number: &mut next_number, - numeric_list_range, - skip_numeric_conversion, - definitions: Vec::new(), - is_definition_line: vec![false; lines.len()], - numeric_candidates: Vec::new(), - }; - collect_scan_updates(lines, &mut state); - finalize_numeric_candidates(&mut state); - - DefinitionUpdates { - definitions: state.definitions, - is_definition_line: state.is_definition_line, - } -} - fn apply_mapping_to_lines( lines: &mut [String], mapping: &HashMap, @@ -460,12 +181,6 @@ fn apply_mapping_to_lines( } } -fn rewrite_definition_headers(lines: &mut [String], definitions: &[DefinitionLine]) { - for definition in definitions { - lines[definition.index].clone_from(&definition.line); - } -} - pub(super) fn renumber_footnotes(lines: &mut [String]) { let mut mapping = collect_reference_mapping(lines); let DefinitionUpdates { @@ -491,40 +206,4 @@ pub(super) fn renumber_footnotes(lines: &mut [String]) { } #[cfg(test)] -mod tests { - use super::{numeric_candidate_from_line, renumber_footnotes}; - - fn strings(lines: &[&str]) -> Vec { - lines.iter().map(|line| (*line).to_string()).collect() - } - - #[test] - fn malformed_numeric_candidate_line_is_ignored() { - assert!(numeric_candidate_from_line("7.", 0).is_none()); - assert!(numeric_candidate_from_line("7:", 0).is_none()); - } - - #[test] - fn renumber_footnotes_rewrites_existing_definition_headers() { - let mut lines = strings(&["Reference.[^7]", "", "[^7]: Existing definition"]); - - renumber_footnotes(&mut lines); - - assert_eq!( - lines, - strings(&["Reference.[^1]", "", "[^1]: Existing definition"]) - ); - } - - #[test] - fn renumber_footnotes_rewrites_numeric_candidates() { - let mut lines = strings(&["Reference.[^7]", "", "7. Legacy footnote"]); - - renumber_footnotes(&mut lines); - - assert_eq!( - lines, - strings(&["Reference.[^1]", "", "[^1]: Legacy footnote"]) - ); - } -} +mod tests; diff --git a/src/footnotes/renumber/definitions.rs b/src/footnotes/renumber/definitions.rs new file mode 100644 index 00000000..fa429501 --- /dev/null +++ b/src/footnotes/renumber/definitions.rs @@ -0,0 +1,313 @@ +//! Definition scanning and reordering helpers for footnote renumbering. +//! +//! The parent module keeps the top-level rewrite flow, while this submodule +//! owns the detail-heavy definition parsing and reordering machinery so each +//! source file stays readable and within the repository size limit. + +use std::collections::HashMap; + +use super::{ + FOOTNOTE_LINE_RE, + footnote_block_range, + has_existing_footnote_block, + is_definition_continuation, + is_fence_line, + parse_definition, + rewrite_tokens, +}; + +#[derive(Clone)] +pub(super) struct DefinitionLine { + pub(super) index: usize, + pub(super) new_number: usize, + pub(super) line: String, +} + +pub(super) struct NumericCandidate { + index: usize, + number: usize, + indent: String, + whitespace: String, + rest: String, +} + +pub(super) struct DefinitionUpdates { + pub(super) definitions: Vec, + pub(super) is_definition_line: Vec, +} + +struct DefinitionScanState<'a> { + mapping: &'a mut HashMap, + next_number: &'a mut usize, + numeric_list_range: Option<(usize, usize)>, + skip_numeric_conversion: bool, + definitions: Vec, + is_definition_line: Vec, + numeric_candidates: Vec, +} + +fn definition_segment_end(lines: &[String], start: usize, block_end: usize) -> usize { + let mut idx = start + 1; + while idx < block_end { + let line = &lines[idx]; + if parse_definition(line).is_some() { + break; + } + if is_definition_continuation(line) { + idx += 1; + continue; + } + if line.trim().is_empty() { + if idx + 1 < block_end && parse_definition(&lines[idx + 1]).is_some() { + break; + } + idx += 1; + continue; + } + break; + } + idx +} + +fn assign_new_number( + mapping: &mut HashMap, + number: usize, + next_number: &mut usize, +) -> usize { + if let Some(&mapped) = mapping.get(&number) { + mapped + } else { + let assigned = *next_number; + *next_number += 1; + mapping.insert(number, assigned); + assigned + } +} + +fn should_convert_numeric_line( + index: usize, + numeric_range: Option<(usize, usize)>, + skip_numeric_conversion: bool, +) -> bool { + if skip_numeric_conversion { + return false; + } + numeric_range.is_some_and(|(start, end)| index >= start && index < end) +} + +fn definition_line_from_parts( + index: usize, + parts: super::super::parsing::DefinitionParts<'_>, + mapping: &mut HashMap, + next_number: &mut usize, +) -> DefinitionLine { + let new_number = assign_new_number(mapping, parts.number, next_number); + let rewritten_rest = rewrite_tokens(parts.rest, mapping); + let mut line = String::with_capacity(parts.prefix.len() + rewritten_rest.len() + 8); + line.push_str(parts.prefix); + let header = format!("[^{new_number}]:"); + line.push_str(&header); + line.push_str(&rewritten_rest); + DefinitionLine { + index, + new_number, + line, + } +} + +pub(super) fn numeric_candidate_from_line(line: &str, index: usize) -> Option { + let caps = FOOTNOTE_LINE_RE.captures(line)?; + let indent = caps.name("indent").map_or("", |m| m.as_str()).to_string(); + let num_match = caps.name("num")?; + let rest_match = caps.name("rest")?; + let number = num_match.as_str().parse::().ok()?; + let rest = rest_match.as_str().to_string(); + let whitespace = line[num_match.end() + 1..rest_match.start()].to_string(); + Some(NumericCandidate { + index, + number, + indent, + whitespace, + rest, + }) +} + +fn collect_scan_updates(lines: &[String], state: &mut DefinitionScanState<'_>) { + let mut in_fence = false; + + for (index, line) in lines.iter().enumerate() { + if is_fence_line(line) { + in_fence = !in_fence; + continue; + } + if in_fence { + continue; + } + + if let Some(parts) = parse_definition(line) { + state.definitions.push(definition_line_from_parts( + index, + parts, + state.mapping, + state.next_number, + )); + state.is_definition_line[index] = true; + continue; + } + + if !should_convert_numeric_line( + index, + state.numeric_list_range, + state.skip_numeric_conversion, + ) { + continue; + } + if state.mapping.is_empty() && state.definitions.is_empty() { + continue; + } + if let Some(candidate) = numeric_candidate_from_line(line, index) { + state.numeric_candidates.push(candidate); + } + } +} + +fn finalize_numeric_candidates(state: &mut DefinitionScanState<'_>) { + // Drain from the bottom so wrapped continuation lines stay attached to the + // correct definition when numeric candidates are later reordered by their + // assigned footnote numbers. + for candidate in state.numeric_candidates.drain(..).rev() { + let new_number = assign_new_number(state.mapping, candidate.number, state.next_number); + let rewritten_rest = rewrite_tokens(&candidate.rest, state.mapping); + let mut line = String::with_capacity( + candidate.indent.len() + candidate.whitespace.len() + rewritten_rest.len() + 8, + ); + line.push_str(&candidate.indent); + let header = format!("[^{new_number}]:"); + line.push_str(&header); + line.push_str(&candidate.whitespace); + line.push_str(&rewritten_rest); + state.definitions.push(DefinitionLine { + index: candidate.index, + new_number, + line, + }); + state.is_definition_line[candidate.index] = true; + } +} + +pub(super) fn collect_definition_updates( + lines: &[String], + mapping: &mut HashMap, +) -> DefinitionUpdates { + let mut next_number = mapping.values().copied().max().unwrap_or(0) + 1; + let numeric_list_range = footnote_block_range(lines); + let skip_numeric_conversion = numeric_list_range + .as_ref() + .is_some_and(|(start, _)| has_existing_footnote_block(lines, *start)); + let mut state = DefinitionScanState { + mapping, + next_number: &mut next_number, + numeric_list_range, + skip_numeric_conversion, + definitions: Vec::new(), + is_definition_line: vec![false; lines.len()], + numeric_candidates: Vec::new(), + }; + collect_scan_updates(lines, &mut state); + finalize_numeric_candidates(&mut state); + + DefinitionUpdates { + definitions: state.definitions, + is_definition_line: state.is_definition_line, + } +} + +pub(super) fn rewrite_definition_headers(lines: &mut [String], definitions: &[DefinitionLine]) { + for definition in definitions { + lines[definition.index].clone_from(&definition.line); + } +} + +pub(super) fn reorder_definition_block( + lines: &mut [String], + start: usize, + end: usize, + definitions: &[DefinitionLine], +) { + let header_positions: Vec = (start..end) + .filter(|&idx| parse_definition(&lines[idx]).is_some()) + .collect(); + if header_positions.len() <= 1 { + return; + } + + let def_lookup: HashMap = definitions + .iter() + .filter(|definition| (start..end).contains(&definition.index)) + .map(|definition| (definition.index, definition)) + .collect(); + if def_lookup.len() <= 1 { + return; + } + + let prefix_len = header_positions.first().map_or(0, |first| first - start); + let mut segments: Vec<(usize, usize, Vec)> = Vec::new(); + let mut consumed = start + prefix_len; + for &position in &header_positions { + let mut leading_start = position; + while leading_start > consumed + && lines[leading_start - 1].trim().is_empty() + && !is_definition_continuation(&lines[leading_start - 1]) + { + leading_start -= 1; + } + let next_bound = definition_segment_end(lines, position, end); + if let Some(definition) = def_lookup.get(&position) { + debug_assert!( + position >= leading_start, + "definition header {position} cannot precede leading segment start {leading_start}", + ); + let mut segment = Vec::with_capacity(next_bound.saturating_sub(leading_start).max(1)); + segment.extend(lines[leading_start..position].iter().cloned()); + segment.push(definition.line.clone()); + let tail_start = position.saturating_add(1); + if tail_start < next_bound { + segment.extend(lines[tail_start..next_bound].iter().cloned()); + } + segments.push((definition.new_number, definition.index, segment)); + } + consumed = next_bound; + } + + if segments.len() <= 1 { + return; + } + + segments.sort_by(|left, right| left.0.cmp(&right.0).then(left.1.cmp(&right.1))); + + let mut first_leading = Vec::new(); + if let Some((_, _, first_segment)) = segments.first_mut() { + while first_segment + .first() + .is_some_and(|line| line.trim().is_empty() && !is_definition_continuation(line)) + { + first_leading.push(first_segment.remove(0)); + } + } + + let mut reordered = Vec::with_capacity(end - start); + if prefix_len > 0 { + reordered.extend(lines[start..start + prefix_len].iter().cloned()); + } + + for (idx, (_, _, segment)) in segments.into_iter().enumerate() { + reordered.extend(segment); + if idx == 0 && !first_leading.is_empty() { + reordered.append(&mut first_leading); + } + } + + if reordered.len() == end - start { + lines[start..end].clone_from_slice(&reordered); + } +} diff --git a/src/footnotes/renumber/tests.rs b/src/footnotes/renumber/tests.rs new file mode 100644 index 00000000..577c58c8 --- /dev/null +++ b/src/footnotes/renumber/tests.rs @@ -0,0 +1,42 @@ +//! Unit tests for footnote renumbering helpers. +//! +//! These cases exercise the private numeric-candidate parsing and renumbering +//! helpers directly so the behaviour stays covered without bloating the main +//! module file. + +use rstest::rstest; + +use super::{numeric_candidate_from_line, renumber_footnotes}; + +fn strings(lines: &[&str]) -> Vec { lines.iter().map(|line| (*line).to_string()).collect() } + +#[rstest] +#[case("7.")] +#[case("7:")] +fn malformed_numeric_candidate_line_is_ignored(#[case] line: &str) { + assert!(numeric_candidate_from_line(line, 0).is_none()); +} + +#[test] +fn renumber_footnotes_rewrites_existing_definition_headers() { + let mut lines = strings(&["Reference.[^7]", "", "[^7]: Existing definition"]); + + renumber_footnotes(&mut lines); + + assert_eq!( + lines, + strings(&["Reference.[^1]", "", "[^1]: Existing definition"]) + ); +} + +#[test] +fn renumber_footnotes_rewrites_numeric_candidates() { + let mut lines = strings(&["Reference.[^7]", "", "7. Legacy footnote"]); + + renumber_footnotes(&mut lines); + + assert_eq!( + lines, + strings(&["Reference.[^1]", "", "[^1]: Legacy footnote"]) + ); +} diff --git a/tests/table/convert_html.rs b/tests/table/convert_html.rs index 7b45072e..9b004d0a 100644 --- a/tests/table/convert_html.rs +++ b/tests/table/convert_html.rs @@ -78,38 +78,40 @@ fn preserves_trailing_spaces_in_cells() { assert_eq!(convert_html_tables(&input), expected); } -#[test] -fn converts_indented_multiline_html_table() { - let input = lines_vec![ - " ", - " ", - " ", - "
AB
12
", - ]; - let expected = lines_vec![ - " | A | B |", - " | --- | --- |", - " | 1 | 2 |", - ]; - assert_eq!(convert_html_tables(&input), expected); -} - -#[test] -fn converts_indented_table_without_touching_surrounding_content() { - let input = lines_vec![ - "
before
", - " ", - " ", - " ", - "
AB
12
", - "

after

", - ]; - let expected = lines_vec![ - "
before
", - " | A | B |", - " | --- | --- |", - " | 1 | 2 |", - "

after

", - ]; +#[rstest( + input, + expected, + case::multiline( + lines_vec![ + " ", + " ", + " ", + "
AB
12
", + ], + lines_vec![ + " | A | B |", + " | --- | --- |", + " | 1 | 2 |", + ] + ), + case::surrounding_content( + lines_vec![ + "
before
", + " ", + " ", + " ", + "
AB
12
", + "

after

", + ], + lines_vec![ + "
before
", + " | A | B |", + " | --- | --- |", + " | 1 | 2 |", + "

after

", + ] + ), +)] +fn converts_indented_html_table_cases(input: Vec, expected: Vec) { assert_eq!(convert_html_tables(&input), expected); } From 8c1c7a78ee582990eace2fa854f35d109edcd4f0 Mon Sep 17 00:00:00 2001 From: Payton McIntosh Date: Sat, 18 Apr 2026 18:44:59 +0100 Subject: [PATCH 5/6] Simplify DefinitionParts path in renumber helpers Expose `DefinitionParts` through the immediate `renumber` parent module so the definition-scanning helper module no longer reaches back through a double-super path. This keeps the module boundary clearer without changing the renumbering behaviour. --- src/footnotes/renumber.rs | 3 +++ src/footnotes/renumber/definitions.rs | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/footnotes/renumber.rs b/src/footnotes/renumber.rs index 91d6ff5f..ba2e93e2 100644 --- a/src/footnotes/renumber.rs +++ b/src/footnotes/renumber.rs @@ -5,6 +5,9 @@ use std::{collections::HashMap, sync::LazyLock}; use regex::{Captures, Match, Regex}; mod definitions; +mod parsing { + pub(super) use super::super::parsing::DefinitionParts; +} #[cfg(test)] use definitions::numeric_candidate_from_line; diff --git a/src/footnotes/renumber/definitions.rs b/src/footnotes/renumber/definitions.rs index fa429501..56d88ecb 100644 --- a/src/footnotes/renumber/definitions.rs +++ b/src/footnotes/renumber/definitions.rs @@ -97,7 +97,7 @@ fn should_convert_numeric_line( fn definition_line_from_parts( index: usize, - parts: super::super::parsing::DefinitionParts<'_>, + parts: super::parsing::DefinitionParts<'_>, mapping: &mut HashMap, next_number: &mut usize, ) -> DefinitionLine { From 766c589ed0b4fe9b91734514f31165c09d4de9c8 Mon Sep 17 00:00:00 2001 From: Payton McIntosh Date: Sat, 18 Apr 2026 19:19:21 +0100 Subject: [PATCH 6/6] Avoid panic on missing footnote regex match Handle the impossible-but-fallible `caps.get(0)` path in `rewrite_refs_in_segment` without panicking. If the regex capture is missing, return a safe empty replacement instead of aborting the renumbering pass. --- src/footnotes/renumber.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/footnotes/renumber.rs b/src/footnotes/renumber.rs index ba2e93e2..24acd57d 100644 --- a/src/footnotes/renumber.rs +++ b/src/footnotes/renumber.rs @@ -70,7 +70,9 @@ fn is_fence_line(line: &str) -> bool { fn rewrite_refs_in_segment(text: &str, mapping: &HashMap) -> String { FOOTNOTE_REF_RE .replace_all(text, |caps: &Captures| { - let mat = caps.get(0).expect("regex matched without capture"); + let Some(mat) = caps.get(0) else { + return String::new(); + }; if is_definition_like(text, &mat) { return caps[0].to_string(); }