diff --git a/AGENTS.md b/AGENTS.md index 0a41a4bb..d1edc3e8 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -28,9 +28,9 @@ examples demonstrating the usage and outcome of the function. Test documentation should omit examples where the example serves only to reiterate the test logic. -- **Keep file size managable.** No single code file may be longer than 400 +- **Keep file size manageable.** No single code file may be longer than 400 lines. Long switch statements or dispatch tables should be broken up by - feature and constituents colocated with targets. Large blocks of test data + feature and constituents co-located with targets. Large blocks of test data should be moved to external data files. ## Documentation Maintenance @@ -157,10 +157,10 @@ project: changes from new major versions. This approach is critical for ensuring build stability and reproducibility. - **Prohibit unstable version specifiers.** The use of wildcard (`*`) or - open-ended inequality (`>=`) version requirements is strictly forbidden as - they introduce unacceptable risk and unpredictability. Tilde requirements - (`~`) should only be used where a dependency must be locked to patch-level - updates for a specific, documented reason. + open-ended inequality (`>=`) version requirements is strictly forbidden, as it + introduces unacceptable risk and unpredictability. Tilde requirements (`~`) + should only be used where a dependency must be locked to patch-level updates + for a specific, documented reason. ### Error Handling diff --git a/src/breaks.rs b/src/breaks.rs index a0560528..e8139da5 100644 --- a/src/breaks.rs +++ b/src/breaks.rs @@ -16,6 +16,30 @@ pub(crate) static THEMATIC_BREAK_RE: std::sync::LazyLock = std::sync::Laz static THEMATIC_BREAK_LINE: std::sync::LazyLock = std::sync::LazyLock::new(|| "_".repeat(THEMATIC_BREAK_LEN)); +/// Normalize thematic breaks outside fenced code blocks. +/// +/// Consecutive hyphens, asterisks or underscores are replaced with a +/// standardised line of underscores. Fenced code blocks are ignored so +/// that breaks within them remain untouched. +/// +/// # Examples +/// +/// ``` +/// use std::borrow::Cow; +/// +/// use mdtablefix::{THEMATIC_BREAK_LEN, format_breaks}; +/// +/// let lines = vec!["foo".to_string(), "***".to_string(), "bar".to_string()]; +/// let out = format_breaks(&lines); +/// assert_eq!( +/// out, +/// vec![ +/// Cow::Borrowed("foo"), +/// Cow::Owned("_".repeat(THEMATIC_BREAK_LEN)), +/// Cow::Borrowed("bar"), +/// ] +/// ); +/// ``` #[must_use] pub fn format_breaks(lines: &[String]) -> Vec> { let mut out = Vec::with_capacity(lines.len()); diff --git a/src/table.rs b/src/table.rs index 88bcb754..7e0fc700 100644 --- a/src/table.rs +++ b/src/table.rs @@ -6,42 +6,36 @@ use regex::Regex; -fn next_is_pipe(chars: &mut std::iter::Peekable>) -> bool { - chars.peek() == Some(&'|') -} +static ESCAPED_PIPE_RE: std::sync::LazyLock = + std::sync::LazyLock::new(|| Regex::new(r"\\\|").unwrap()); #[must_use] +/// Split a Markdown table row into individual cell strings. +/// +/// Escaped pipe characters (`\|`) are treated as literals and whitespace +/// inside each cell is trimmed. +/// +/// # Examples +/// +/// ``` +/// use mdtablefix::split_cells; +/// assert_eq!( +/// split_cells("| A | B |"), +/// vec!["A".to_string(), "B".to_string()] +/// ); +/// assert_eq!( +/// split_cells("a | b \\| c | d"), +/// vec!["a".to_string(), "b | c".to_string(), "d".to_string()] +/// ); +/// ``` pub fn split_cells(line: &str) -> Vec { - let mut s = line.trim(); - if let Some(stripped) = s.strip_prefix('|') { - s = stripped; - } - if let Some(stripped) = s.strip_suffix('|') { - s = stripped; - } - - let mut cells = Vec::new(); - let mut current = String::new(); - let mut chars = s.chars().peekable(); - while let Some(ch) = chars.next() { - if ch == '\\' { - if next_is_pipe(&mut chars) { - chars.next(); - current.push('|'); - continue; - } - current.push(ch); - continue; - } - if ch == '|' { - cells.push(current.trim().to_string()); - current.clear(); - } else { - current.push(ch); - } - } - cells.push(current.trim().to_string()); - cells + let trimmed = line.trim().trim_start_matches('|').trim_end_matches('|'); + let placeholder = '\u{1f}'; + let replaced = ESCAPED_PIPE_RE.replace_all(trimmed, &placeholder.to_string()); + replaced + .split('|') + .map(|cell| cell.trim().replace(placeholder, "|")) + .collect() } pub(crate) fn format_separator_cells(widths: &[usize], sep_cells: &[String]) -> Vec { @@ -162,6 +156,25 @@ fn calculate_and_format( crate::reflow::insert_separator(out, sep_cells, &widths, indent) } +/// Reflow a Markdown table so columns align uniformly. +/// +/// Invalid tables are returned unchanged. +/// +/// # Examples +/// +/// ``` +/// use mdtablefix::reflow_table; +/// let lines = vec![ +/// "| A | B | |".to_string(), +/// "| 1 | 2 | | 3 | 4 |".to_string(), +/// ]; +/// let expected = vec![ +/// "| A | B |".to_string(), +/// "| 1 | 2 |".to_string(), +/// "| 3 | 4 |".to_string(), +/// ]; +/// assert_eq!(reflow_table(&lines), expected); +/// ``` #[must_use] pub fn reflow_table(lines: &[String]) -> Vec { if lines.is_empty() {