diff --git a/src/lib.rs b/src/lib.rs index bf3d8604..f26a76ad 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,14 +27,37 @@ fn split_cells(line: &str) -> Vec { if let Some(stripped) = s.strip_suffix('|') { s = stripped; } - s.split('|').map(|c| c.trim().to_string()).collect() + + let mut cells = Vec::new(); + let mut current = String::new(); + let mut chars = s.chars().peekable(); + while let Some(ch) = chars.next() { + if ch == '\\' { + if let Some(&next) = chars.peek() { + if next == '|' { + current.push('|'); + chars.next(); + continue; + } + } + current.push(ch); + continue; + } + if ch == '|' { + cells.push(current.trim().to_string()); + current.clear(); + } else { + current.push(ch); + } + } + cells.push(current.trim().to_string()); + cells } /// Reflow a broken markdown table. /// /// # Panics /// Panics if the internal regex fails to compile. -#[must_use] /// Reflows a broken markdown table into properly aligned rows and columns. /// /// Takes a slice of strings representing lines of a markdown table, reconstructs the table by splitting and aligning cells, and returns the reflowed table as a vector of strings. If the rows have inconsistent numbers of non-empty columns, the original lines are returned unchanged. @@ -52,10 +75,24 @@ fn split_cells(line: &str) -> Vec { /// "| c | d |".to_string(), /// ]); /// ``` +static SENTINEL_RE: std::sync::LazyLock = + std::sync::LazyLock::new(|| Regex::new(r"\|\s*\|\s*").unwrap()); +static SEP_RE: std::sync::LazyLock = + std::sync::LazyLock::new(|| Regex::new(r"^[\s|:-]+$").unwrap()); + +#[must_use] pub fn reflow_table(lines: &[String]) -> Vec { - let raw = lines.iter().map(|l| l.trim()).collect::>().join(" "); - let sentinel_re = Regex::new(r"\|\s*\|\s*").unwrap(); - let chunks: Vec<&str> = sentinel_re.split(&raw).collect(); + if lines.is_empty() { + return Vec::new(); + } + + let indent: String = lines[0].chars().take_while(|c| c.is_whitespace()).collect(); + let mut trimmed: Vec = lines.iter().map(|l| l.trim().to_string()).collect(); + let sep_idx = trimmed.iter().position(|l| SEP_RE.is_match(l)); + let sep_line = sep_idx.map(|idx| trimmed.remove(idx)); + + let raw = trimmed.join(" "); + let chunks: Vec<&str> = SENTINEL_RE.split(&raw).collect(); let mut cells = Vec::new(); for (idx, chunk) in chunks.iter().enumerate() { let mut ch = (*chunk).to_string(); @@ -94,22 +131,29 @@ pub fn reflow_table(lines: &[String]) -> Vec { return lines.to_vec(); } - rows.into_iter() + let out: Vec = rows + .into_iter() .map(|mut r| { r.retain(|c| !c.is_empty()); while r.len() < max_cols { r.push(String::new()); } - format!("| {} |", r.join(" | ")) + format!("{}| {} |", indent, r.join(" | ")) }) - .collect() + .collect(); + + if let Some(sep) = sep_line { + if let Some(first) = out.first().cloned() { + let mut with_sep = vec![first, format!("{}{}", indent, sep)]; + with_sep.extend(out.into_iter().skip(1)); + return with_sep; + } + return vec![format!("{}{}", indent, sep)]; + } + + out } -/// Process a stream of markdown lines, reflowing tables. -/// -/// # Panics -/// Panics if the regex used for code fences fails to compile. -#[must_use] /// Processes a stream of markdown lines, reflowing tables while preserving code blocks and other content. /// /// Detects fenced code blocks and avoids modifying their contents. Buffers lines that appear to be part of a markdown table and reflows them when the table ends. Non-table lines and code blocks are output unchanged. @@ -139,15 +183,18 @@ pub fn reflow_table(lines: &[String]) -> Vec { /// assert_eq!(output[5], "code block"); /// assert_eq!(output[6], "```"); /// ``` +static FENCE_RE: std::sync::LazyLock = + std::sync::LazyLock::new(|| Regex::new(r"^(```|~~~).*").unwrap()); + +#[must_use] pub fn process_stream(lines: &[String]) -> Vec { - let fence_re = Regex::new(r"^(```|~~~)").unwrap(); let mut out = Vec::new(); let mut buf = Vec::new(); let mut in_code = false; let mut in_table = false; for line in lines { - if fence_re.is_match(line) { + if FENCE_RE.is_match(line) { if !buf.is_empty() { if in_table { out.extend(reflow_table(&buf)); diff --git a/src/main.rs b/src/main.rs index c2db03d2..b9aaf7d1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,7 +8,7 @@ use std::path::PathBuf; #[command(about = "Reflow broken markdown tables")] struct Cli { /// Rewrite files in place - #[arg(long = "in-place")] + #[arg(long = "in-place", requires = "files")] in_place: bool, /// Markdown files to fix files: Vec, @@ -37,10 +37,6 @@ struct Cli { fn main() -> anyhow::Result<()> { let cli = Cli::parse(); - if cli.in_place && cli.files.is_empty() { - anyhow::bail!("--in-place requires at least one file"); - } - if cli.files.is_empty() { let mut input = String::new(); io::stdin().read_to_string(&mut input)?; diff --git a/tests/integration.rs b/tests/integration.rs index b35b9baa..cce3d1ad 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -38,6 +38,31 @@ fn malformed_table() -> Vec { vec!["| A | |".to_string(), "| 1 | 2 | 3 |".to_string()] } +#[fixture] +fn header_table() -> Vec { + vec![ + "| A | B | |".to_string(), + "| --- | --- |".to_string(), + "| 1 | 2 | | 3 | 4 |".to_string(), + ] +} + +#[fixture] +fn escaped_pipe_table() -> Vec { + vec![ + "| X | Y | |".to_string(), + "| a \\| b | 1 | | 2 | 3 |".to_string(), + ] +} + +#[fixture] +fn indented_table() -> Vec { + vec![ + " | I | J | |".to_string(), + " | 1 | 2 | | 3 | 4 |".to_string(), + ] +} + #[rstest] /// Tests that `reflow_table` correctly restructures a broken Markdown table into a well-formed table. /// @@ -62,12 +87,30 @@ fn test_reflow_malformed_returns_original(malformed_table: Vec) { } #[rstest] +fn test_reflow_preserves_header(header_table: Vec) { + let expected = vec!["| A | B |", "| --- | --- |", "| 1 | 2 |", "| 3 | 4 |"]; + assert_eq!(reflow_table(&header_table), expected); +} + +#[rstest] +fn test_reflow_handles_escaped_pipes(escaped_pipe_table: Vec) { + let expected = vec!["| X | Y |", "| a | b | 1 |", "| 2 | 3 |"]; + assert_eq!(reflow_table(&escaped_pipe_table), expected); +} + +#[rstest] +fn test_reflow_preserves_indentation(indented_table: Vec) { + let expected = vec![" | I | J |", " | 1 | 2 |", " | 3 | 4 |"]; + assert_eq!(reflow_table(&indented_table), expected); +} + /// Tests that `process_stream` leaves lines inside code fences unchanged. /// /// Verifies that both backtick (```) and tilde (~~~) fenced code blocks are ignored by the table processing logic, ensuring their contents are not altered. +#[rstest] fn test_process_stream_ignores_code_fences() { let lines = vec![ - "```".to_string(), + "```rust".to_string(), "| not | a | table |".to_string(), "```".to_string(), ]; @@ -123,6 +166,8 @@ fn test_cli_process_file(broken_table: Vec) { for line in &broken_table { writeln!(f, "{}", line).unwrap(); } + f.flush().unwrap(); + drop(f); Command::cargo_bin("mdtablefix") .unwrap() .arg(&file_path)