diff --git a/README.md b/README.md index 883253df..a6b1acdc 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,8 @@ uniform width. It can wrap paragraphs and list items to 80 columns when the `--wrap` option is used. Hyphenated words are treated as single units during wrapping, so `very-long-word` moves to the next line rather than splitting at -the hyphen. The tool ignores fenced code blocks and respects escaped pipes -(`\|`), making it safe for mixed content. +the hyphen. The tool ignores fenced code blocks and respects escaped pipes (`\| +`), making it safe for mixed content. ## Installation @@ -28,10 +28,13 @@ mdtablefix [--wrap] [--renumber] [--breaks] [--in-place] [FILE...] - With file paths provided, the corrected tables are printed to stdout. - Use `--wrap` to also reflow paragraphs and list items to 80 columns. - Use `--renumber` to rewrite ordered lists with sequential numbering. +- Tabs are interpreted as four spaces when counting indentation for + `--renumber`. - Use `--breaks` to normalize thematic breaks to a line of 70 underscores (configurable via the `THEMATIC_BREAK_LEN` constant). - Use `--in-place` to overwrite files. -- If no files are supplied, input is read from stdin and results are written to stdout. +- If no files are supplied, input is read from stdin and results are written + to stdout. ### Example @@ -76,14 +79,14 @@ fn main() -> std::io::Result<()> { ## HTML table support -`mdtablefix` recognises simple `` elements embedded in Markdown. -Before the main table reflow runs these HTML tables are converted to Markdown in -a preprocessing stage handled by `convert_html_tables`. +`mdtablefix` recognises simple `
` elements embedded in Markdown. Before +the main table reflow runs these HTML tables are converted to Markdown in a +preprocessing stage handled by `convert_html_tables`. Only basic tables composed of ``, `
` and `` tags are detected, and attributes or tag casing do not matter. After conversion the regular reflow -logic aligns them alongside Markdown tables. See -[`docs/html-table-support.md`](docs/html-table-support.md) for details. +logic aligns them alongside Markdown tables. See [`docs/html-table-support.md`] +(docs/html-table-support.md) for details. ## Testing @@ -92,5 +95,5 @@ is organised using the [`rstest`](https://crates.io/crates/rstest) crate. ## License -This project is licensed under the ISC license. -See the [LICENSE](LICENSE) file for details. +This project is licensed under the ISC license. See the [LICENSE](LICENSE) file +for details. diff --git a/src/lib.rs b/src/lib.rs index 5468c4ea..575fbc08 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -226,6 +226,34 @@ static BULLET_RE: std::sync::LazyLock = static NUMBERED_RE: std::sync::LazyLock = std::sync::LazyLock::new(|| Regex::new(r"^(\s*)([1-9][0-9]*)\.(\s+)(.*)").unwrap()); +/// Parses a line beginning with a numbered list marker. +/// +/// Returns the indentation prefix, separator following the number, and the +/// remainder of the line if `line` matches the numbered list pattern. +#[doc(hidden)] +fn parse_numbered(line: &str) -> Option<(&str, &str, &str)> { + let cap = NUMBERED_RE.captures(line)?; + let indent = cap.get(1)?.as_str(); + let sep = cap.get(3)?.as_str(); + let rest = cap.get(4)?.as_str(); + Some((indent, sep, rest)) +} + +/// Returns the effective indentation length treating tabs as four spaces. +#[doc(hidden)] +fn indent_len(indent: &str) -> usize { + indent + .chars() + .fold(0, |acc, ch| acc + if ch == '\t' { 4 } else { 1 }) +} + +#[doc(hidden)] +fn drop_deeper(indent: usize, counters: &mut Vec<(usize, usize)>) { + while counters.last().is_some_and(|(d, _)| *d > indent) { + counters.pop(); + } +} + fn tokenize_markdown(text: &str) -> Vec { let mut tokens = Vec::new(); let chars: Vec = text.chars().collect(); @@ -595,6 +623,27 @@ fn process_stream_inner(lines: &[String], wrap: bool) -> Vec { /// Lines matching `^\s*[1-9][0-9]*\.\s+` are renumbered sequentially within /// their indentation level. Numbering continues across fenced code blocks /// without resetting. +/// +/// # Examples +/// ``` +/// use mdtablefix::renumber_lists; +/// +/// let lines = vec!["1. foo", "4. bar"] +/// .into_iter() +/// .map(str::to_string) +/// .collect::>(); +/// assert_eq!( +/// renumber_lists(&lines), +/// vec!["1. foo", "2. bar"] +/// .into_iter() +/// .map(str::to_string) +/// .collect::>() +/// ); +/// ``` +/// +/// # Panics +/// Panics if the internal counter stack is empty when a numbered line is +/// encountered. This indicates a logic error. pub fn renumber_lists(lines: &[String]) -> Vec { let mut out = Vec::with_capacity(lines.len()); let mut counters: Vec<(usize, usize)> = Vec::new(); @@ -612,28 +661,26 @@ pub fn renumber_lists(lines: &[String]) -> Vec { continue; } - if let Some(cap) = NUMBERED_RE.captures(line) { - let indent = cap.get(1).map_or("", |m| m.as_str()); - let indent_len = indent.len(); - while counters.last().is_some_and(|(i, _)| *i > indent_len) { - counters.pop(); - } - if counters.last().is_none_or(|(i, _)| *i < indent_len) { - counters.push((indent_len, 1)); - } - let idx = counters.len() - 1; - let num = counters[idx].1; - counters[idx].1 += 1; - let spaces = cap.get(3).map_or("", |m| m.as_str()); - let rest = cap.get(4).map_or("", |m| m.as_str()); - out.push(format!("{indent}{num}.{spaces}{rest}")); + if let Some((indent_str, sep, rest)) = parse_numbered(line) { + let indent = indent_len(indent_str); + drop_deeper(indent, &mut counters); + let current = match counters.last_mut() { + Some((d, cnt)) if *d == indent => { + *cnt += 1; + *cnt + } + _ => { + counters.push((indent, 1)); + 1 + } + }; + out.push(format!("{indent_str}{current}.{sep}{rest}")); continue; } - let indent_len = line.chars().take_while(|c| c.is_whitespace()).count(); - while counters.last().is_some_and(|(i, _)| *i > indent_len) { - counters.pop(); - } + let indent_part: String = line.chars().take_while(|c| c.is_whitespace()).collect(); + let indent = indent_len(&indent_part); + drop_deeper(indent, &mut counters); out.push(line.clone()); } diff --git a/tests/integration.rs b/tests/integration.rs index c12b6415..cb835caf 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -839,6 +839,21 @@ fn test_renumber_nested_lists() { assert_eq!(renumber_lists(&input), expected); } +#[test] +fn test_renumber_tabs_in_indent() { + let input = vec!["1. first", "\t1. sub first", "\t5. sub second", "2. second"] + .into_iter() + .map(str::to_string) + .collect::>(); + + let expected = vec!["1. first", "\t1. sub first", "\t2. sub second", "2. second"] + .into_iter() + .map(str::to_string) + .collect::>(); + + assert_eq!(renumber_lists(&input), expected); +} + #[test] fn test_renumber_mult_paragraph_items() { let input = vec!["1. first", "", " still first paragraph", "", "2. second"]