From 378fd2c3e5f6ac5cb6f69e7cc6870ec83d4c83c3 Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 14 Jul 2025 20:35:30 +0100 Subject: [PATCH 1/3] Refactor renumbering logic --- src/lib.rs | 52 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5468c4ea..674da099 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -226,6 +226,19 @@ static BULLET_RE: std::sync::LazyLock = static NUMBERED_RE: std::sync::LazyLock = std::sync::LazyLock::new(|| Regex::new(r"^(\s*)([1-9][0-9]*)\.(\s+)(.*)").unwrap()); +/// Parses a line beginning with a numbered list marker. +/// +/// Returns the indentation length, separator following the number, and the +/// remainder of the line if `line` matches the numbered list pattern. +#[doc(hidden)] +fn parse_numbered(line: &str) -> Option<(usize, &str, &str)> { + let cap = NUMBERED_RE.captures(line)?; + let indent = cap.get(1)?.as_str().len(); + let sep = cap.get(3)?.as_str(); + let rest = cap.get(4)?.as_str(); + Some((indent, sep, rest)) +} + fn tokenize_markdown(text: &str) -> Vec { let mut tokens = Vec::new(); let chars: Vec = text.chars().collect(); @@ -596,8 +609,11 @@ fn process_stream_inner(lines: &[String], wrap: bool) -> Vec { /// their indentation level. Numbering continues across fenced code blocks /// without resetting. pub fn renumber_lists(lines: &[String]) -> Vec { + use std::collections::HashMap; + let mut out = Vec::with_capacity(lines.len()); - let mut counters: Vec<(usize, usize)> = Vec::new(); + let mut stack = Vec::::new(); + let mut counters = HashMap::::new(); let mut in_code = false; for line in lines { @@ -612,27 +628,29 @@ pub fn renumber_lists(lines: &[String]) -> Vec { continue; } - if let Some(cap) = NUMBERED_RE.captures(line) { - let indent = cap.get(1).map_or("", |m| m.as_str()); - let indent_len = indent.len(); - while counters.last().is_some_and(|(i, _)| *i > indent_len) { - counters.pop(); + if let Some((indent, sep, rest)) = parse_numbered(line) { + while stack.last().is_some_and(|&d| d > indent) { + if let Some(d) = stack.pop() { + counters.remove(&d); + } } - if counters.last().is_none_or(|(i, _)| *i < indent_len) { - counters.push((indent_len, 1)); + + if stack.last().is_none_or(|&d| d < indent) { + stack.push(indent); } - let idx = counters.len() - 1; - let num = counters[idx].1; - counters[idx].1 += 1; - let spaces = cap.get(3).map_or("", |m| m.as_str()); - let rest = cap.get(4).map_or("", |m| m.as_str()); - out.push(format!("{indent}{num}.{spaces}{rest}")); + + let num = counters.entry(indent).or_insert(1); + let current = *num; + *num += 1; + out.push(format!("{}{}.{}{}", " ".repeat(indent), current, sep, rest)); continue; } - let indent_len = line.chars().take_while(|c| c.is_whitespace()).count(); - while counters.last().is_some_and(|(i, _)| *i > indent_len) { - counters.pop(); + let indent = line.chars().take_while(|c| c.is_whitespace()).count(); + while stack.last().is_some_and(|&d| d > indent) { + if let Some(d) = stack.pop() { + counters.remove(&d); + } } out.push(line.clone()); } From 19e516837570e1b3fe0aa35788981c4bfddc6b36 Mon Sep 17 00:00:00 2001 From: Leynos Date: Tue, 15 Jul 2025 07:24:10 +0100 Subject: [PATCH 2/3] Simplify list renumbering counters --- src/lib.rs | 56 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 674da099..7c5bed6e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -608,12 +608,30 @@ fn process_stream_inner(lines: &[String], wrap: bool) -> Vec { /// Lines matching `^\s*[1-9][0-9]*\.\s+` are renumbered sequentially within /// their indentation level. Numbering continues across fenced code blocks /// without resetting. +/// +/// # Examples +/// ``` +/// use mdtablefix::renumber_lists; +/// +/// let lines = vec!["1. foo", "4. bar"] +/// .into_iter() +/// .map(str::to_string) +/// .collect::>(); +/// assert_eq!( +/// renumber_lists(&lines), +/// vec!["1. foo", "2. bar"] +/// .into_iter() +/// .map(str::to_string) +/// .collect::>() +/// ); +/// ``` +/// +/// # Panics +/// Panics if the internal counter stack is empty when a numbered line is +/// encountered. This indicates a logic error. pub fn renumber_lists(lines: &[String]) -> Vec { - use std::collections::HashMap; - let mut out = Vec::with_capacity(lines.len()); - let mut stack = Vec::::new(); - let mut counters = HashMap::::new(); + let mut counters: Vec<(usize, usize)> = Vec::new(); let mut in_code = false; for line in lines { @@ -629,28 +647,26 @@ pub fn renumber_lists(lines: &[String]) -> Vec { } if let Some((indent, sep, rest)) = parse_numbered(line) { - while stack.last().is_some_and(|&d| d > indent) { - if let Some(d) = stack.pop() { - counters.remove(&d); - } - } - - if stack.last().is_none_or(|&d| d < indent) { - stack.push(indent); + while counters.last().is_some_and(|(d, _)| *d > indent) { + counters.pop(); } - - let num = counters.entry(indent).or_insert(1); - let current = *num; - *num += 1; + let current = match counters.last_mut() { + Some((d, cnt)) if *d == indent => { + *cnt += 1; + *cnt + } + _ => { + counters.push((indent, 1)); + 1 + } + }; out.push(format!("{}{}.{}{}", " ".repeat(indent), current, sep, rest)); continue; } let indent = line.chars().take_while(|c| c.is_whitespace()).count(); - while stack.last().is_some_and(|&d| d > indent) { - if let Some(d) = stack.pop() { - counters.remove(&d); - } + while counters.last().is_some_and(|(d, _)| *d > indent) { + counters.pop(); } out.push(line.clone()); } From adfdd3cfa4940c7e342321308e97e1f454b42b58 Mon Sep 17 00:00:00 2001 From: Leynos Date: Tue, 15 Jul 2025 08:01:45 +0100 Subject: [PATCH 3/3] Simplify list counters and handle tabs --- README.md | 23 +++++++++++++---------- src/lib.rs | 37 +++++++++++++++++++++++++------------ tests/integration.rs | 15 +++++++++++++++ 3 files changed, 53 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 883253df..a6b1acdc 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,8 @@ uniform width. It can wrap paragraphs and list items to 80 columns when the `--wrap` option is used. Hyphenated words are treated as single units during wrapping, so `very-long-word` moves to the next line rather than splitting at -the hyphen. The tool ignores fenced code blocks and respects escaped pipes -(`\|`), making it safe for mixed content. +the hyphen. The tool ignores fenced code blocks and respects escaped pipes (`\| +`), making it safe for mixed content. ## Installation @@ -28,10 +28,13 @@ mdtablefix [--wrap] [--renumber] [--breaks] [--in-place] [FILE...] - With file paths provided, the corrected tables are printed to stdout. - Use `--wrap` to also reflow paragraphs and list items to 80 columns. - Use `--renumber` to rewrite ordered lists with sequential numbering. +- Tabs are interpreted as four spaces when counting indentation for + `--renumber`. - Use `--breaks` to normalize thematic breaks to a line of 70 underscores (configurable via the `THEMATIC_BREAK_LEN` constant). - Use `--in-place` to overwrite files. -- If no files are supplied, input is read from stdin and results are written to stdout. +- If no files are supplied, input is read from stdin and results are written + to stdout. ### Example @@ -76,14 +79,14 @@ fn main() -> std::io::Result<()> { ## HTML table support -`mdtablefix` recognises simple `` elements embedded in Markdown. -Before the main table reflow runs these HTML tables are converted to Markdown in -a preprocessing stage handled by `convert_html_tables`. +`mdtablefix` recognises simple `
` elements embedded in Markdown. Before +the main table reflow runs these HTML tables are converted to Markdown in a +preprocessing stage handled by `convert_html_tables`. Only basic tables composed of ``, `
` and `` tags are detected, and attributes or tag casing do not matter. After conversion the regular reflow -logic aligns them alongside Markdown tables. See -[`docs/html-table-support.md`](docs/html-table-support.md) for details. +logic aligns them alongside Markdown tables. See [`docs/html-table-support.md`] +(docs/html-table-support.md) for details. ## Testing @@ -92,5 +95,5 @@ is organised using the [`rstest`](https://crates.io/crates/rstest) crate. ## License -This project is licensed under the ISC license. -See the [LICENSE](LICENSE) file for details. +This project is licensed under the ISC license. See the [LICENSE](LICENSE) file +for details. diff --git a/src/lib.rs b/src/lib.rs index 7c5bed6e..575fbc08 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -228,17 +228,32 @@ static NUMBERED_RE: std::sync::LazyLock = /// Parses a line beginning with a numbered list marker. /// -/// Returns the indentation length, separator following the number, and the +/// Returns the indentation prefix, separator following the number, and the /// remainder of the line if `line` matches the numbered list pattern. #[doc(hidden)] -fn parse_numbered(line: &str) -> Option<(usize, &str, &str)> { +fn parse_numbered(line: &str) -> Option<(&str, &str, &str)> { let cap = NUMBERED_RE.captures(line)?; - let indent = cap.get(1)?.as_str().len(); + let indent = cap.get(1)?.as_str(); let sep = cap.get(3)?.as_str(); let rest = cap.get(4)?.as_str(); Some((indent, sep, rest)) } +/// Returns the effective indentation length treating tabs as four spaces. +#[doc(hidden)] +fn indent_len(indent: &str) -> usize { + indent + .chars() + .fold(0, |acc, ch| acc + if ch == '\t' { 4 } else { 1 }) +} + +#[doc(hidden)] +fn drop_deeper(indent: usize, counters: &mut Vec<(usize, usize)>) { + while counters.last().is_some_and(|(d, _)| *d > indent) { + counters.pop(); + } +} + fn tokenize_markdown(text: &str) -> Vec { let mut tokens = Vec::new(); let chars: Vec = text.chars().collect(); @@ -646,10 +661,9 @@ pub fn renumber_lists(lines: &[String]) -> Vec { continue; } - if let Some((indent, sep, rest)) = parse_numbered(line) { - while counters.last().is_some_and(|(d, _)| *d > indent) { - counters.pop(); - } + if let Some((indent_str, sep, rest)) = parse_numbered(line) { + let indent = indent_len(indent_str); + drop_deeper(indent, &mut counters); let current = match counters.last_mut() { Some((d, cnt)) if *d == indent => { *cnt += 1; @@ -660,14 +674,13 @@ pub fn renumber_lists(lines: &[String]) -> Vec { 1 } }; - out.push(format!("{}{}.{}{}", " ".repeat(indent), current, sep, rest)); + out.push(format!("{indent_str}{current}.{sep}{rest}")); continue; } - let indent = line.chars().take_while(|c| c.is_whitespace()).count(); - while counters.last().is_some_and(|(d, _)| *d > indent) { - counters.pop(); - } + let indent_part: String = line.chars().take_while(|c| c.is_whitespace()).collect(); + let indent = indent_len(&indent_part); + drop_deeper(indent, &mut counters); out.push(line.clone()); } diff --git a/tests/integration.rs b/tests/integration.rs index c12b6415..cb835caf 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -839,6 +839,21 @@ fn test_renumber_nested_lists() { assert_eq!(renumber_lists(&input), expected); } +#[test] +fn test_renumber_tabs_in_indent() { + let input = vec!["1. first", "\t1. sub first", "\t5. sub second", "2. second"] + .into_iter() + .map(str::to_string) + .collect::>(); + + let expected = vec!["1. first", "\t1. sub first", "\t2. sub second", "2. second"] + .into_iter() + .map(str::to_string) + .collect::>(); + + assert_eq!(renumber_lists(&input), expected); +} + #[test] fn test_renumber_mult_paragraph_items() { let input = vec!["1. first", "", " still first paragraph", "", "2. second"]