From d9f1703c645624bbe91a0b2f108c7886564744ff Mon Sep 17 00:00:00 2001 From: Leynos Date: Sun, 20 Jul 2025 03:25:24 +0100 Subject: [PATCH 1/3] Add footnote conversion feature --- README.md | 28 ++---- docs/module-relationships.md | 9 +- src/footnotes.rs | 136 ++++++++++++++++++++++++++++++ src/lib.rs | 3 + src/main.rs | 7 +- src/process.rs | 27 ++++-- tests/cli.rs | 18 ++++ tests/data/footnotes_expected.txt | 27 ++++++ tests/data/footnotes_input.txt | 27 ++++++ tests/footnotes.rs | 21 +++++ 10 files changed, 274 insertions(+), 29 deletions(-) create mode 100644 src/footnotes.rs create mode 100644 tests/data/footnotes_expected.txt create mode 100644 tests/data/footnotes_input.txt create mode 100644 tests/footnotes.rs diff --git a/README.md b/README.md index 0b5071d4..9c46ad67 100644 --- a/README.md +++ b/README.md @@ -13,26 +13,20 @@ making it safe to use on Markdown with mixed content. Install via Cargo: -Bash - ```bash cargo install mdtablefix ``` Or clone the repository and build from source: -Bash - ```bash cargo install --path . ``` ## Command-line usage -Bash - ```bash -mdtablefix [--wrap] [--renumber] [--breaks] [--ellipsis] [--in-place] [FILE...] +mdtablefix [--wrap] [--renumber] [--breaks] [--ellipsis] [--footnotes] [--in-place] [FILE...] ``` - When one or more file paths are provided, the corrected tables are printed to @@ -53,6 +47,9 @@ mdtablefix [--wrap] [--renumber] [--breaks] [--ellipsis] [--in-place] [FILE...] character (`…`). Longer runs are processed left-to-right, so any leftover dots are preserved. +- Use `--footnotes` to convert bare numeric references into GitHub-flavoured + footnote links. + - Use `--in-place` to modify files in-place. - If no files are specified, input is read from stdin and output is written to @@ -62,8 +59,6 @@ mdtablefix [--wrap] [--renumber] [--breaks] [--ellipsis] [--in-place] [FILE...] Before: -Markdown - ```markdown |Character|Catchphrase|Pizza count| |---|---|---| |Speedy Cerviche|Here come the Samurai Pizza Cats!|lots| |Guido Anchovy|Slice and dice!|tons| @@ -72,8 +67,6 @@ come the Samurai Pizza Cats!|lots| |Guido Anchovy|Slice and dice!|tons| After running `mdtablefix`: -Markdown - ```markdown | Character | Catchphrase | Pizza count | | --------------- | --------------------------------- | ----------- | @@ -86,8 +79,6 @@ Markdown Before: -Markdown - ```markdown 1. The Big Cheese's evil plans. 4. Jerry Atric's schemes. @@ -102,8 +93,6 @@ A brief intermission for pizza. After running `mdtablefix --renumber`: -Markdown - ```markdown 1. The Big Cheese's evil plans. 2. Jerry Atric's schemes. @@ -121,8 +110,6 @@ A brief intermission for pizza. The crate provides helper functions for embedding the table reflow logic in your own Rust project: -Rust - ```rust use mdtablefix::{process_stream_opts, rewrite}; use std::path::Path; @@ -133,6 +120,7 @@ fn main() -> std::io::Result<()> { &lines, /* wrap = */ true, /* ellipsis = */ true, + /* footnotes = */ false, ); println!("{}", fixed.join("\n")); rewrite(Path::new("table.md"))?; @@ -140,9 +128,9 @@ fn main() -> std::io::Result<()> { } ``` -- `process_stream_opts(lines: &[String], wrap: bool, ellipsis: bool) -> - Vec` rewrites tables in memory, with optional paragraph wrapping and - ellipsis substitution. +- `process_stream_opts(lines, wrap, ellipsis, footnotes) -> Vec` + rewrites tables in memory, with optional paragraph wrapping, ellipsis + substitution, and footnote conversion. - `rewrite(path: &Path) -> std::io::Result<()>` modifies a Markdown file on disk in-place. diff --git a/docs/module-relationships.md b/docs/module-relationships.md index 2443a05d..1137d929 100644 --- a/docs/module-relationships.md +++ b/docs/module-relationships.md @@ -36,6 +36,10 @@ classDiagram <> +replace_ellipsis() } + class footnotes { + <> + +convert_footnotes() + } class process { <> +process_stream() @@ -63,11 +67,12 @@ classDiagram process ..> table : uses reflow_table process ..> wrap : uses wrap_text, is_fence process ..> ellipsis : uses replace_ellipsis + process ..> footnotes : uses convert_footnotes io ..> process : uses process_stream, process_stream_no_wrap ``` The `lib` module re-exports the public API from the other modules. The `ellipsis` module performs text normalisation. The `process` module provides streaming helpers that combine the lower-level functions, including ellipsis -replacement. The `io` module handles filesystem operations, delegating the text -processing to `process`. +replacement and footnote conversion. The `io` module handles filesystem +operations, delegating the text processing to `process`. diff --git a/src/footnotes.rs b/src/footnotes.rs new file mode 100644 index 00000000..9af8dfcd --- /dev/null +++ b/src/footnotes.rs @@ -0,0 +1,136 @@ +//! Footnote normalisation utilities. +//! +//! Converts bare numeric references in text to GitHub-flavoured Markdown +//! footnote links and rewrites the trailing numeric list into a footnote +//! block. + +use regex::Regex; + +use crate::wrap::{Token, tokenize_markdown}; + +static FOOTNOTE_LINE_RE: std::sync::LazyLock = std::sync::LazyLock::new(|| { + Regex::new(r"^(?P\s*)(?P\d+)\.\s+(?P.*)$").unwrap() +}); + +fn convert_inline(text: &str) -> String { + let mut out = String::with_capacity(text.len()); + let chars: Vec = text.chars().collect(); + let mut i = 0; + while i < chars.len() { + let ch = chars[i]; + if matches!(ch, '.' | '!' | '?' | ')' | ';' | ':') + && (i == 0 || !chars[i - 1].is_ascii_digit()) + { + let mut j = i + 1; + while j < chars.len() && chars[j].is_ascii_digit() { + j += 1; + } + if j > i + 1 && (j == chars.len() || chars[j].is_whitespace()) { + out.push(ch); + out.push_str("[^"); + for c in &chars[i + 1..j] { + out.push(*c); + } + out.push(']'); + if j < chars.len() { + out.push(chars[j]); + j += 1; + } + i = j; + continue; + } + } + out.push(ch); + i += 1; + } + out +} + +fn convert_block(lines: &mut [String]) { + let mut end = lines.len(); + while end > 0 && lines[end - 1].trim().is_empty() { + end -= 1; + } + let mut start = end; + while start > 0 { + if FOOTNOTE_LINE_RE.is_match(lines[start - 1].trim_end()) { + start -= 1; + } else { + break; + } + } + if start >= end { + return; + } + if lines[start].trim_start().starts_with("[^") { + return; + } + for line in lines.iter_mut().take(end).skip(start) { + if let Some(cap) = FOOTNOTE_LINE_RE.captures(line.as_str()) { + let indent = cap.name("indent").unwrap().as_str(); + let num = cap.name("num").unwrap().as_str(); + let rest = cap.name("rest").unwrap().as_str(); + *line = format!("{indent}[^{num}] {rest}"); + } + } +} + +/// Convert bare numeric footnote references to Markdown footnote syntax. +#[must_use] +pub fn convert_footnotes(lines: &[String]) -> Vec { + if lines.is_empty() { + return Vec::new(); + } + let joined = lines.join("\n"); + let mut out = String::new(); + for token in tokenize_markdown(&joined) { + match token { + Token::Text(t) => out.push_str(&convert_inline(t)), + Token::Code(c) => { + out.push('`'); + out.push_str(c); + out.push('`'); + } + Token::Fence(f) => out.push_str(f), + Token::Newline => out.push('\n'), + } + } + let mut lines: Vec = out.split('\n').map(str::to_string).collect(); + convert_block(&mut lines); + lines +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn converts_inline_numbers() { + let input = vec!["See the docs.2".to_string()]; + let expected = vec!["See the docs.[^2]".to_string()]; + assert_eq!(convert_footnotes(&input), expected); + } + + #[test] + fn converts_final_list() { + let input = vec![ + "Text.".to_string(), + String::new(), + " 1. First".to_string(), + " 2. Second".to_string(), + ]; + let expected = vec![ + "Text.".to_string(), + String::new(), + " [^1] First".to_string(), + " [^2] Second".to_string(), + ]; + assert_eq!(convert_footnotes(&input), expected); + } + + #[test] + fn idempotent_on_existing_block() { + let input = vec![" [^1] First".to_string()]; + assert_eq!(convert_footnotes(&input), input); + } +} diff --git a/src/lib.rs b/src/lib.rs index 115c3767..f0b27bf2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,11 +7,13 @@ //! - `lists` for renumbering ordered lists. //! - `breaks` for thematic break formatting. //! - `ellipsis` for normalising textual ellipses. +//! - `footnotes` for converting bare footnote links. //! - `process` for stream processing. //! - `io` for file helpers. pub mod breaks; pub mod ellipsis; +pub mod footnotes; mod html; pub mod io; pub mod lists; @@ -28,6 +30,7 @@ pub fn html_table_to_markdown(lines: &[String]) -> Vec { pub use breaks::{THEMATIC_BREAK_LEN, format_breaks}; pub use ellipsis::replace_ellipsis; +pub use footnotes::convert_footnotes; pub use html::convert_html_tables; pub use io::{rewrite, rewrite_no_wrap}; pub use lists::renumber_lists; diff --git a/src/main.rs b/src/main.rs index b7179f27..7b92885e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -23,7 +23,7 @@ struct Cli { #[derive(clap::Args, Clone, Copy)] #[expect( clippy::struct_excessive_bools, - reason = "CLI exposes four independent flags" + reason = "CLI exposes five independent flags" )] struct FormatOpts { /// Wrap paragraphs and list items to 80 columns @@ -38,10 +38,13 @@ struct FormatOpts { /// Replace "..." with the ellipsis character #[arg(long = "ellipsis")] ellipsis: bool, + /// Convert bare numeric footnotes to Markdown links + #[arg(long = "footnotes")] + footnotes: bool, } fn process_lines(lines: &[String], opts: FormatOpts) -> Vec { - let mut out = process_stream_opts(lines, opts.wrap, opts.ellipsis); + let mut out = process_stream_opts(lines, opts.wrap, opts.ellipsis, opts.footnotes); if opts.renumber { out = renumber_lists(&out); } diff --git a/src/process.rs b/src/process.rs index e0b6a1ba..cbb5c73d 100644 --- a/src/process.rs +++ b/src/process.rs @@ -2,13 +2,19 @@ use crate::{ ellipsis::replace_ellipsis, + footnotes::convert_footnotes, html::convert_html_tables, table::reflow_table, wrap::{self, wrap_text}, }; #[must_use] -pub fn process_stream_inner(lines: &[String], wrap: bool, ellipsis: bool) -> Vec { +pub fn process_stream_inner( + lines: &[String], + wrap: bool, + ellipsis: bool, + footnotes: bool, +) -> Vec { let pre = convert_html_tables(lines); let mut out = Vec::new(); @@ -70,6 +76,10 @@ pub fn process_stream_inner(lines: &[String], wrap: bool, ellipsis: bool) -> Vec } } + if footnotes { + out = convert_footnotes(&out); + } + let mut out = if wrap { wrap_text(&out, 80) } else { out }; if ellipsis { out = replace_ellipsis(&out); @@ -78,16 +88,23 @@ pub fn process_stream_inner(lines: &[String], wrap: bool, ellipsis: bool) -> Vec } #[must_use] -pub fn process_stream(lines: &[String]) -> Vec { process_stream_inner(lines, true, false) } +pub fn process_stream(lines: &[String]) -> Vec { + process_stream_inner(lines, true, false, false) +} #[must_use] pub fn process_stream_no_wrap(lines: &[String]) -> Vec { - process_stream_inner(lines, false, false) + process_stream_inner(lines, false, false, false) } #[must_use] -pub fn process_stream_opts(lines: &[String], wrap: bool, ellipsis: bool) -> Vec { - process_stream_inner(lines, wrap, ellipsis) +pub fn process_stream_opts( + lines: &[String], + wrap: bool, + ellipsis: bool, + footnotes: bool, +) -> Vec { + process_stream_inner(lines, wrap, ellipsis, footnotes) } #[cfg(test)] diff --git a/tests/cli.rs b/tests/cli.rs index 1aafda4f..f0e3cf28 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -133,3 +133,21 @@ fn test_cli_ellipsis_multiple_sequences() { "First… then second… done.\n" ); } + +/// Tests the CLI `--footnotes` option to convert bare footnote links. +#[test] +fn test_cli_footnotes_option() { + let input = include_str!("data/footnotes_input.txt"); + let expected = include_str!("data/footnotes_expected.txt"); + let output = Command::cargo_bin("mdtablefix") + .expect("Failed to create cargo command for mdtablefix") + .arg("--footnotes") + .write_stdin(input) + .output() + .expect("Failed to execute mdtablefix command"); + assert!(output.status.success()); + assert_eq!( + output.stdout, + format!("{}\n", expected.trim_end()).as_bytes() + ); +} diff --git a/tests/data/footnotes_expected.txt b/tests/data/footnotes_expected.txt new file mode 100644 index 00000000..89c0ed32 --- /dev/null +++ b/tests/data/footnotes_expected.txt @@ -0,0 +1,27 @@ +The purpose of a documentation example extends beyond merely demonstrating +syntax. A reader can typically be expected to understand the mechanics of +calling a function or instantiating a struct. A truly valuable example +illustrates *why* and in *what context* an item should be used.[^10] It should +tell a small story or solve a miniature problem that illuminates the item's +purpose. For instance, an example for + +`String::clone()` should not just show `hello.clone();`, but should demonstrate +a scenario where ownership rules necessitate creating a copy.[^10] + +To achieve this, examples must be clear and concise. Any code that is not +directly relevant to the point being made—such as complex setup, boilerplate, +or unrelated logic—should be hidden to avoid distracting the reader.[^3] + +### 2.3 Ergonomic Error Handling: Taming the `?` Operator + +One of the most common ergonomic hurdles in writing doctests involves handling +functions that return a `Result`. The question mark (`?`) operator is the +idiomatic way to propagate errors in Rust, but it presents a challenge for +doctests. The implicit `fn main()` wrapper generated by `rustdoc` has a return +type of `()`, while the `?` operator can only be used in a function that +returns a `Result` or `Option`. This mismatch leads to a compilation error.[^3] + + [^1] Not a footnote + [^2] Rustdoc doctests need fixing - Swatinem, accessed on July 15, 2025, + [^3] Another footnote +[^10] Duplicate example footnote diff --git a/tests/data/footnotes_input.txt b/tests/data/footnotes_input.txt new file mode 100644 index 00000000..cb54f199 --- /dev/null +++ b/tests/data/footnotes_input.txt @@ -0,0 +1,27 @@ +The purpose of a documentation example extends beyond merely demonstrating +syntax. A reader can typically be expected to understand the mechanics of +calling a function or instantiating a struct. A truly valuable example +illustrates *why* and in *what context* an item should be used.10 It should +tell a small story or solve a miniature problem that illuminates the item's +purpose. For instance, an example for + +`String::clone()` should not just show `hello.clone();`, but should demonstrate +a scenario where ownership rules necessitate creating a copy.10 + +To achieve this, examples must be clear and concise. Any code that is not +directly relevant to the point being made—such as complex setup, boilerplate, +or unrelated logic—should be hidden to avoid distracting the reader.3 + +### 2.3 Ergonomic Error Handling: Taming the `?` Operator + +One of the most common ergonomic hurdles in writing doctests involves handling +functions that return a `Result`. The question mark (`?`) operator is the +idiomatic way to propagate errors in Rust, but it presents a challenge for +doctests. The implicit `fn main()` wrapper generated by `rustdoc` has a return +type of `()`, while the `?` operator can only be used in a function that +returns a `Result` or `Option`. This mismatch leads to a compilation error.3 + + 1. Not a footnote + 2. Rustdoc doctests need fixing - Swatinem, accessed on July 15, 2025, + 3. Another footnote +10. Duplicate example footnote diff --git a/tests/footnotes.rs b/tests/footnotes.rs new file mode 100644 index 00000000..83a153a8 --- /dev/null +++ b/tests/footnotes.rs @@ -0,0 +1,21 @@ +//! Integration tests for footnote conversion. + +use mdtablefix::convert_footnotes; + +#[macro_use] +mod prelude; + +#[test] +fn test_convert_bare_footnotes() { + let input: Vec = include_lines!("data/footnotes_input.txt"); + let expected: Vec = include_lines!("data/footnotes_expected.txt"); + let output = convert_footnotes(&input); + assert_eq!(output, expected); +} + +#[test] +fn test_idempotent_on_converted() { + let expected: Vec = include_lines!("data/footnotes_expected.txt"); + let output = convert_footnotes(&expected); + assert_eq!(output, expected); +} From 867312cbcffd2d97fd9a0af1593fb8902f4b234c Mon Sep 17 00:00:00 2001 From: Leynos Date: Sun, 20 Jul 2025 04:11:03 +0100 Subject: [PATCH 2/3] Handle styled footnote punctuation --- src/footnotes.rs | 9 +++++++-- tests/footnotes.rs | 13 +++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/footnotes.rs b/src/footnotes.rs index 9af8dfcd..bbdca625 100644 --- a/src/footnotes.rs +++ b/src/footnotes.rs @@ -22,13 +22,18 @@ fn convert_inline(text: &str) -> String { && (i == 0 || !chars[i - 1].is_ascii_digit()) { let mut j = i + 1; + while j < chars.len() && matches!(chars[j], '*' | '_') { + j += 1; + } + let digits_start = j; while j < chars.len() && chars[j].is_ascii_digit() { j += 1; } - if j > i + 1 && (j == chars.len() || chars[j].is_whitespace()) { + if j > digits_start && (j == chars.len() || chars[j].is_whitespace()) { out.push(ch); + out.extend(chars[i + 1..digits_start].iter()); out.push_str("[^"); - for c in &chars[i + 1..j] { + for c in &chars[digits_start..j] { out.push(*c); } out.push(']'); diff --git a/tests/footnotes.rs b/tests/footnotes.rs index 83a153a8..eadd4050 100644 --- a/tests/footnotes.rs +++ b/tests/footnotes.rs @@ -19,3 +19,16 @@ fn test_idempotent_on_converted() { let output = convert_footnotes(&expected); assert_eq!(output, expected); } + +#[test] +fn test_avoids_false_positives() { + let input = lines_vec!("Plan9 is interesting.", "Call 1-800-555-1234 for help.",); + assert_eq!(convert_footnotes(&input), input); +} + +#[test] +fn test_handles_punctuation_inside_bold() { + let input = lines_vec!("It was **scary.**7"); + let expected = lines_vec!("It was **scary.**[^7]"); + assert_eq!(convert_footnotes(&input), expected); +} From a99cecd070b48782a41b63871043fee7fb541432 Mon Sep 17 00:00:00 2001 From: Leynos Date: Sun, 20 Jul 2025 04:35:31 +0100 Subject: [PATCH 3/3] Improve footnote conversion --- Cargo.lock | 1 + Cargo.toml | 1 + README.md | 4 +- src/footnotes.rs | 92 +++++++++++++++------------------------------- src/main.rs | 3 +- src/process.rs | 7 ++-- tests/footnotes.rs | 14 +++++++ 7 files changed, 53 insertions(+), 69 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 510f7e10..d1676e23 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -404,6 +404,7 @@ dependencies = [ "clap", "html5ever", "markup5ever_rcdom", + "once_cell", "regex", "rstest", "tempfile", diff --git a/Cargo.toml b/Cargo.toml index 7c529e07..19afabb3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ edition = "2024" anyhow = "1" clap = { version = "4", features = ["derive"] } regex = "1" +once_cell = "1" html5ever = "0.27" markup5ever_rcdom = "0.3" unicode-width = ">=0.1, <0.2" diff --git a/README.md b/README.md index 9c46ad67..68db0695 100644 --- a/README.md +++ b/README.md @@ -47,8 +47,8 @@ mdtablefix [--wrap] [--renumber] [--breaks] [--ellipsis] [--footnotes] [--in-pla character (`…`). Longer runs are processed left-to-right, so any leftover dots are preserved. -- Use `--footnotes` to convert bare numeric references into GitHub-flavoured - footnote links. +- Use `--footnotes` to convert bare numeric references and the final numbered + list into GitHub-flavoured footnote links. - Use `--in-place` to modify files in-place. diff --git a/src/footnotes.rs b/src/footnotes.rs index bbdca625..790ea833 100644 --- a/src/footnotes.rs +++ b/src/footnotes.rs @@ -2,81 +2,49 @@ //! //! Converts bare numeric references in text to GitHub-flavoured Markdown //! footnote links and rewrites the trailing numeric list into a footnote -//! block. +//! block. Only the final contiguous list of footnotes is processed. -use regex::Regex; +use regex::{Captures, Regex}; -use crate::wrap::{Token, tokenize_markdown}; +static INLINE_FN_RE: std::sync::LazyLock = std::sync::LazyLock::new(|| { + Regex::new(r"(?P
^|[^0-9])(?P[.!?);:])(?P