diff --git a/README.md b/README.md index 254b21e7..883253df 100644 --- a/README.md +++ b/README.md @@ -22,12 +22,14 @@ cargo install --path . ## Command-line usage ```bash -mdtablefix [--wrap] [--renumber] [--in-place] [FILE...] +mdtablefix [--wrap] [--renumber] [--breaks] [--in-place] [FILE...] ``` - With file paths provided, the corrected tables are printed to stdout. - Use `--wrap` to also reflow paragraphs and list items to 80 columns. - Use `--renumber` to rewrite ordered lists with sequential numbering. +- Use `--breaks` to normalize thematic breaks to a line of 70 underscores + (configurable via the `THEMATIC_BREAK_LEN` constant). - Use `--in-place` to overwrite files. - If no files are supplied, input is read from stdin and results are written to stdout. diff --git a/src/lib.rs b/src/lib.rs index f695f9b4..a9ec98b4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -227,6 +227,14 @@ static BULLET_RE: std::sync::LazyLock = static NUMBERED_RE: std::sync::LazyLock = std::sync::LazyLock::new(|| Regex::new(r"^(\s*)([1-9][0-9]*)\.(\s+)(.*)").unwrap()); +/// Width of a normalised thematic break. +/// The width used when rewriting thematic breaks. +pub const THEMATIC_BREAK_LEN: usize = 70; + +static THEMATIC_BREAK_RE: std::sync::LazyLock = std::sync::LazyLock::new(|| { + Regex::new(r"^[ ]{0,3}((?:[ \t]*\*){3,}|(?:[ \t]*-){3,}|(?:[ \t]*_){3,})[ \t]*$").unwrap() +}); + /// Returns `true` if the line is a fenced code block delimiter (e.g., three backticks or "~~~"). /// /// # Examples @@ -541,6 +549,33 @@ pub fn renumber_lists(lines: &[String]) -> Vec { out } +#[must_use] +/// Reformat thematic breaks as 70 underscores. +/// +/// Thematic breaks are lines composed of three or more matching `-`, `_`, or +/// `*` characters (optionally separated by spaces or tabs) with up to three +/// leading spaces. Lines inside fenced code blocks are ignored. +pub fn format_breaks(lines: &[String]) -> Vec { + let mut out = Vec::with_capacity(lines.len()); + let mut in_code = false; + + for line in lines { + if FENCE_RE.is_match(line) { + in_code = !in_code; + out.push(line.clone()); + continue; + } + + if !in_code && THEMATIC_BREAK_RE.is_match(line.trim_end()) { + out.push("_".repeat(THEMATIC_BREAK_LEN)); + } else { + out.push(line.clone()); + } + } + + out +} + #[must_use] pub fn process_stream(lines: &[String]) -> Vec { process_stream_inner(lines, true) } diff --git a/src/main.rs b/src/main.rs index c3422459..43730dee 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,7 +5,7 @@ use std::{ }; use clap::Parser; -use mdtablefix::{process_stream, process_stream_no_wrap, renumber_lists}; +use mdtablefix::{format_breaks, process_stream, process_stream_no_wrap, renumber_lists}; #[derive(Parser)] #[command(about = "Reflow broken markdown tables")] @@ -13,32 +13,44 @@ struct Cli { /// Rewrite files in place #[arg(long = "in-place", requires = "files")] in_place: bool, + #[command(flatten)] + opts: FormatOpts, + /// Markdown files to fix + files: Vec, +} + +#[derive(clap::Args, Clone, Copy)] +struct FormatOpts { /// Wrap paragraphs and list items to 80 columns #[arg(long = "wrap")] wrap: bool, /// Renumber ordered list items #[arg(long = "renumber")] renumber: bool, - /// Markdown files to fix - files: Vec, + /// Reformat thematic breaks as underscores + #[arg(long = "breaks")] + breaks: bool, } -fn process_lines(lines: &[String], wrap: bool, renumber: bool) -> Vec { - let mut out = if wrap { +fn process_lines(lines: &[String], opts: FormatOpts) -> Vec { + let mut out = if opts.wrap { process_stream(lines) } else { process_stream_no_wrap(lines) }; - if renumber { + if opts.renumber { out = renumber_lists(&out); } + if opts.breaks { + out = format_breaks(&out); + } out } -fn rewrite_path(path: &Path, wrap: bool, renumber: bool) -> std::io::Result<()> { +fn rewrite_path(path: &Path, opts: FormatOpts) -> std::io::Result<()> { let content = fs::read_to_string(path)?; let lines: Vec = content.lines().map(str::to_string).collect(); - let fixed = process_lines(&lines, wrap, renumber); + let fixed = process_lines(&lines, opts); fs::write(path, fixed.join("\n") + "\n") } @@ -72,18 +84,18 @@ fn main() -> anyhow::Result<()> { let mut input = String::new(); io::stdin().read_to_string(&mut input)?; let lines: Vec = input.lines().map(str::to_string).collect(); - let fixed = process_lines(&lines, cli.wrap, cli.renumber); + let fixed = process_lines(&lines, cli.opts); println!("{}", fixed.join("\n")); return Ok(()); } for path in cli.files { if cli.in_place { - rewrite_path(&path, cli.wrap, cli.renumber)?; + rewrite_path(&path, cli.opts)?; } else { let content = fs::read_to_string(&path)?; let lines: Vec = content.lines().map(str::to_string).collect(); - let fixed = process_lines(&lines, cli.wrap, cli.renumber); + let fixed = process_lines(&lines, cli.opts); println!("{}", fixed.join("\n")); } } diff --git a/tests/integration.rs b/tests/integration.rs index c883d41b..bdbea78e 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -1,7 +1,14 @@ use std::{fs::File, io::Write}; use assert_cmd::Command; -use mdtablefix::{convert_html_tables, process_stream, reflow_table, renumber_lists}; +use mdtablefix::{ + THEMATIC_BREAK_LEN, + convert_html_tables, + format_breaks, + process_stream, + reflow_table, + renumber_lists, +}; use rstest::{fixture, rstest}; use tempfile::tempdir; @@ -806,62 +813,68 @@ fn test_renumber_mult_paragraph_items() { } #[test] -fn test_wrap_hyphenated_word() { - let line = format!("{} extremely-very-long-word end", "A".repeat(60)); - let output = process_stream(&[line]); - assert_eq!( - output, - vec![ - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA".to_string(), - "extremely-very-long-word end".to_string(), - ] - ); +fn test_format_breaks_basic() { + let input = vec!["foo", "***", "bar"] + .into_iter() + .map(str::to_string) + .collect::>(); + let expected = vec![ + "foo".to_string(), + "_".repeat(THEMATIC_BREAK_LEN), + "bar".to_string(), + ]; + assert_eq!(format_breaks(&input), expected); } + #[test] -fn test_wrap_multiple_hyphenated_words() { - let line = format!("{} foo-bar baz-qux quux-corge end", "A".repeat(60)); - let output = process_stream(&[line]); - assert_eq!( - output, - vec![ - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA foo-bar baz-qux" - .to_string(), - "quux-corge end".to_string(), - ] - ); +fn test_format_breaks_ignores_code() { + let input = vec!["```", "---", "```"] + .into_iter() + .map(str::to_string) + .collect::>(); + assert_eq!(format_breaks(&input), input); } #[test] -fn test_wrap_hyphenated_word_at_boundary() { - let line = format!("{} extremely-very-long-word end", "A".repeat(55)); - let output = process_stream(&[line]); - assert_eq!( - output, - vec![ - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA extremely-very-long-word" - .to_string(), - "end".to_string(), - ] - ); +fn test_format_breaks_mixed_chars() { + let input = vec!["-*-*-"] + .into_iter() + .map(str::to_string) + .collect::>(); + assert_eq!(format_breaks(&input), input); +} + +#[test] +fn test_format_breaks_with_spaces_and_indent() { + let input = vec![" - - - "] + .into_iter() + .map(str::to_string) + .collect::>(); + let expected = vec!["_".repeat(THEMATIC_BREAK_LEN)]; + assert_eq!(format_breaks(&input), expected); } #[test] -fn test_wrap_word_longer_than_width() { - let long_word = "a".repeat(90); - let output = process_stream(&[long_word.clone()]); - assert_eq!(output, vec!["a".repeat(80), "a".repeat(10)]); +fn test_format_breaks_with_tabs_and_underscores() { + let input = vec!["\t_\t_\t_\t"] + .into_iter() + .map(str::to_string) + .collect::>(); + let expected = vec!["_".repeat(THEMATIC_BREAK_LEN)]; + assert_eq!(format_breaks(&input), expected); } #[test] -fn test_wrap_line_without_hyphenated_words() { - let line = format!("{} lorem ipsum dolor sit amet", "A".repeat(60)); - let output = process_stream(&[line]); +fn test_cli_breaks_option() { + let output = Command::cargo_bin("mdtablefix") + .unwrap() + .arg("--breaks") + .write_stdin("---\n") + .output() + .unwrap(); + assert!(output.status.success()); assert_eq!( - output, - vec![ - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA lorem ipsum dolor" - .to_string(), - "sit amet".to_string(), - ] + String::from_utf8_lossy(&output.stdout), + format!("{}\n", "_".repeat(THEMATIC_BREAK_LEN)) ); }