From 0b086f766416f8d477c2386d1a4c3e6fb38e0d94 Mon Sep 17 00:00:00 2001 From: Leynos Date: Tue, 29 Jul 2025 17:56:37 +0100 Subject: [PATCH 1/4] Document formatting helpers --- AGENTS.md | 33 +++++++++++++++++---------------- README.md | 2 +- src/breaks.rs | 24 ++++++++++++++++++++++++ src/table.rs | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 17 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index d6350ace..0a41a4bb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -9,8 +9,8 @@ - **Clarity over cleverness.** Be concise, but favour explicit over terse or obscure idioms. Prefer code that's easy to follow. - **Use functions and composition.** Avoid repetition by extracting reusable - logic. Prefer generators or comprehensions, and declarative code to imperative - repetition when readable. + logic. Prefer generators or comprehensions, and declarative code to + imperative repetition when readable. - **Small, meaningful functions.** Functions must be small, clear in purpose, single responsibility, and obey command/query segregation. - **Clear commit messages.** Commit messages should be descriptive, explaining @@ -25,12 +25,13 @@ ("-ize" / "-yse" / "-our") spelling and grammar, with the exception of references to external APIs. - **Illustrate with clear examples.** Function documentation must include clear - examples demonstrating the usage and outcome of the function. Test documentation - should omit examples where the example serves only to reiterate the test logic. -- **Keep file size managable.** No single code file may be longer than 400 lines. - Long switch statements or dispatch tables should be broken up by feature and - constituents colocated with targets. Large blocks of test data should be moved - to external data files. + examples demonstrating the usage and outcome of the function. Test + documentation should omit examples where the example serves only to reiterate + the test logic. +- **Keep file size managable.** No single code file may be longer than 400 + lines. Long switch statements or dispatch tables should be broken up by + feature and constituents colocated with targets. Large blocks of test data + should be moved to external data files. ## Documentation Maintenance @@ -42,8 +43,8 @@ relevant file(s) in the `docs/` directory to reflect the latest state. **Ensure the documentation remains accurate and current.** - Documentation must use en-GB-oxendict ("-ize" / "-yse" / "-our") spelling - and grammar. (EXCEPTION: the naming of the "LICENSE" file, which - is to be left unchanged for community consistency.) + and grammar. (EXCEPTION: the naming of the "LICENSE" file, which is to be + left unchanged for community consistency.) ## Change Quality & Committing @@ -153,19 +154,19 @@ project: specified in `Cargo.toml` must use SemVer-compatible caret requirements (e.g., `some-crate = "1.2.3"`). This is Cargo's default and allows for safe, non-breaking updates to minor and patch versions while preventing breaking - changes from new major versions. This approach is critical for ensuring - build stability and reproducibility. + changes from new major versions. This approach is critical for ensuring build + stability and reproducibility. - **Prohibit unstable version specifiers.** The use of wildcard (`*`) or - open-ended inequality (`>=`) version requirements is strictly forbidden - as they introduce unacceptable risk and unpredictability. Tilde requirements + open-ended inequality (`>=`) version requirements is strictly forbidden as + they introduce unacceptable risk and unpredictability. Tilde requirements (`~`) should only be used where a dependency must be locked to patch-level updates for a specific, documented reason. ### Error Handling - **Prefer semantic error enums**. Derive `std::error::Error` (via the - `thiserror` crate) for any condition the caller might inspect, retry, or - map to an HTTP status. + `thiserror` crate) for any condition the caller might inspect, retry, or map + to an HTTP status. - **Use an *opaque* error only at the app boundary**. Use `eyre::Report` for human-readable logs; these should not be exposed in public APIs. - **Never export the opaque type from a library**. Convert to domain enums at diff --git a/README.md b/README.md index a7027606..4934a7c9 100644 --- a/README.md +++ b/README.md @@ -161,7 +161,7 @@ alongside regular Markdown tables. See [HTML table support for more details](docs/architecture.md#html-table-support-in-mdtablefix) -. + . ## Module structure diff --git a/src/breaks.rs b/src/breaks.rs index a0560528..e8139da5 100644 --- a/src/breaks.rs +++ b/src/breaks.rs @@ -16,6 +16,30 @@ pub(crate) static THEMATIC_BREAK_RE: std::sync::LazyLock = std::sync::Laz static THEMATIC_BREAK_LINE: std::sync::LazyLock = std::sync::LazyLock::new(|| "_".repeat(THEMATIC_BREAK_LEN)); +/// Normalize thematic breaks outside fenced code blocks. +/// +/// Consecutive hyphens, asterisks or underscores are replaced with a +/// standardised line of underscores. Fenced code blocks are ignored so +/// that breaks within them remain untouched. +/// +/// # Examples +/// +/// ``` +/// use std::borrow::Cow; +/// +/// use mdtablefix::{THEMATIC_BREAK_LEN, format_breaks}; +/// +/// let lines = vec!["foo".to_string(), "***".to_string(), "bar".to_string()]; +/// let out = format_breaks(&lines); +/// assert_eq!( +/// out, +/// vec![ +/// Cow::Borrowed("foo"), +/// Cow::Owned("_".repeat(THEMATIC_BREAK_LEN)), +/// Cow::Borrowed("bar"), +/// ] +/// ); +/// ``` #[must_use] pub fn format_breaks(lines: &[String]) -> Vec> { let mut out = Vec::with_capacity(lines.len()); diff --git a/src/table.rs b/src/table.rs index 88bcb754..f706eb80 100644 --- a/src/table.rs +++ b/src/table.rs @@ -11,6 +11,24 @@ fn next_is_pipe(chars: &mut std::iter::Peekable>) -> bool { } #[must_use] +/// Split a Markdown table row into individual cell strings. +/// +/// Escaped pipe characters (`\|`) are treated as literals and whitespace +/// inside each cell is trimmed. +/// +/// # Examples +/// +/// ``` +/// use mdtablefix::split_cells; +/// assert_eq!( +/// split_cells("| A | B |"), +/// vec!["A".to_string(), "B".to_string()] +/// ); +/// assert_eq!( +/// split_cells("a | b \\| c | d"), +/// vec!["a".to_string(), "b | c".to_string(), "d".to_string()] +/// ); +/// ``` pub fn split_cells(line: &str) -> Vec { let mut s = line.trim(); if let Some(stripped) = s.strip_prefix('|') { @@ -162,6 +180,25 @@ fn calculate_and_format( crate::reflow::insert_separator(out, sep_cells, &widths, indent) } +/// Reflow a Markdown table so columns align uniformly. +/// +/// Invalid tables are returned unchanged. +/// +/// # Examples +/// +/// ``` +/// use mdtablefix::reflow_table; +/// let lines = vec![ +/// "| A | B | |".to_string(), +/// "| 1 | 2 | | 3 | 4 |".to_string(), +/// ]; +/// let expected = vec![ +/// "| A | B |".to_string(), +/// "| 1 | 2 |".to_string(), +/// "| 3 | 4 |".to_string(), +/// ]; +/// assert_eq!(reflow_table(&lines), expected); +/// ``` #[must_use] pub fn reflow_table(lines: &[String]) -> Vec { if lines.is_empty() { From 2b22c280249f30f9575722d92b556907f3e3296e Mon Sep 17 00:00:00 2001 From: Leynos Date: Wed, 30 Jul 2025 22:08:40 +0100 Subject: [PATCH 2/4] Fix typo and refactor split_cells --- AGENTS.md | 2 +- src/table.rs | 42 +++++++++--------------------------------- 2 files changed, 10 insertions(+), 34 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 0a41a4bb..fb33d43f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -28,7 +28,7 @@ examples demonstrating the usage and outcome of the function. Test documentation should omit examples where the example serves only to reiterate the test logic. -- **Keep file size managable.** No single code file may be longer than 400 +- **Keep file size manageable.** No single code file may be longer than 400 lines. Long switch statements or dispatch tables should be broken up by feature and constituents colocated with targets. Large blocks of test data should be moved to external data files. diff --git a/src/table.rs b/src/table.rs index f706eb80..7e0fc700 100644 --- a/src/table.rs +++ b/src/table.rs @@ -6,9 +6,8 @@ use regex::Regex; -fn next_is_pipe(chars: &mut std::iter::Peekable>) -> bool { - chars.peek() == Some(&'|') -} +static ESCAPED_PIPE_RE: std::sync::LazyLock = + std::sync::LazyLock::new(|| Regex::new(r"\\\|").unwrap()); #[must_use] /// Split a Markdown table row into individual cell strings. @@ -30,36 +29,13 @@ fn next_is_pipe(chars: &mut std::iter::Peekable>) -> bool { /// ); /// ``` pub fn split_cells(line: &str) -> Vec { - let mut s = line.trim(); - if let Some(stripped) = s.strip_prefix('|') { - s = stripped; - } - if let Some(stripped) = s.strip_suffix('|') { - s = stripped; - } - - let mut cells = Vec::new(); - let mut current = String::new(); - let mut chars = s.chars().peekable(); - while let Some(ch) = chars.next() { - if ch == '\\' { - if next_is_pipe(&mut chars) { - chars.next(); - current.push('|'); - continue; - } - current.push(ch); - continue; - } - if ch == '|' { - cells.push(current.trim().to_string()); - current.clear(); - } else { - current.push(ch); - } - } - cells.push(current.trim().to_string()); - cells + let trimmed = line.trim().trim_start_matches('|').trim_end_matches('|'); + let placeholder = '\u{1f}'; + let replaced = ESCAPED_PIPE_RE.replace_all(trimmed, &placeholder.to_string()); + replaced + .split('|') + .map(|cell| cell.trim().replace(placeholder, "|")) + .collect() } pub(crate) fn format_separator_cells(widths: &[usize], sep_cells: &[String]) -> Vec { From 761f81d3d2917b52266cd6fd6a541f03623ec18a Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 31 Jul 2025 09:38:56 +0100 Subject: [PATCH 3/4] Fix punctuation and grammar --- AGENTS.md | 8 ++++---- README.md | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index fb33d43f..04462339 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -157,10 +157,10 @@ project: changes from new major versions. This approach is critical for ensuring build stability and reproducibility. - **Prohibit unstable version specifiers.** The use of wildcard (`*`) or - open-ended inequality (`>=`) version requirements is strictly forbidden as - they introduce unacceptable risk and unpredictability. Tilde requirements - (`~`) should only be used where a dependency must be locked to patch-level - updates for a specific, documented reason. + open-ended inequality (`>=`) version requirements is strictly forbidden, as + they introduce unacceptable risk and unpredictability. Tilde requirements + (`~`) should only be used where a dependency must be locked to patch-level + updates for a specific, documented reason. ### Error Handling diff --git a/README.md b/README.md index 4934a7c9..a7027606 100644 --- a/README.md +++ b/README.md @@ -161,7 +161,7 @@ alongside regular Markdown tables. See [HTML table support for more details](docs/architecture.md#html-table-support-in-mdtablefix) - . +. ## Module structure From 7aa9c110b9af545d24d4d1c298ef34c7a0c3291c Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 31 Jul 2025 11:31:43 +0100 Subject: [PATCH 4/4] Fix typos in contribution guidelines --- AGENTS.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 04462339..67f6e6ce 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -30,7 +30,7 @@ the test logic. - **Keep file size manageable.** No single code file may be longer than 400 lines. Long switch statements or dispatch tables should be broken up by - feature and constituents colocated with targets. Large blocks of test data + feature and constituents co-located with targets. Large blocks of test data should be moved to external data files. ## Documentation Maintenance @@ -157,10 +157,10 @@ project: changes from new major versions. This approach is critical for ensuring build stability and reproducibility. - **Prohibit unstable version specifiers.** The use of wildcard (`*`) or - open-ended inequality (`>=`) version requirements is strictly forbidden, as - they introduce unacceptable risk and unpredictability. Tilde requirements - (`~`) should only be used where a dependency must be locked to patch-level - updates for a specific, documented reason. + open-ended inequality (`>=`) version requirements is strictly forbidden, as it + introduces unacceptable risk and unpredictability. Tilde requirements (`~`) + should only be used where a dependency must be locked to patch-level updates + for a specific, documented reason. ### Error Handling