From b60bb8f16d48ed37e54a1dc3f9fbf42bf0e80929 Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 21 Jul 2025 12:41:09 +0100 Subject: [PATCH 1/4] Handle orphaned fence specifiers safely --- src/fences.rs | 25 +++++++++++++++++-------- tests/cli.rs | 19 +++++++++++++++++-- tests/fences.rs | 19 +++++++++++++------ 3 files changed, 47 insertions(+), 16 deletions(-) diff --git a/src/fences.rs b/src/fences.rs index 91454e53..2e6670cf 100644 --- a/src/fences.rs +++ b/src/fences.rs @@ -11,8 +11,7 @@ use regex::Regex; static FENCE_RE: LazyLock = LazyLock::new(|| Regex::new(r"^(\s*)(`{3,}|~{3,})([A-Za-z0-9_+.,-]*)\s*$").unwrap()); -static ORPHAN_LANG_RE: LazyLock = - LazyLock::new(|| Regex::new(r"^[A-Za-z0-9_+.-]+(?:,[A-Za-z0-9_+.-]+)*$").unwrap()); +static ORPHAN_LANG_RE: LazyLock = LazyLock::new(|| Regex::new(r"^\w+(?:,\w+)*$").unwrap()); /// Compress backtick fences to exactly three backticks. /// @@ -71,24 +70,34 @@ pub fn attach_orphan_specifiers(lines: &[String]) -> Vec { for line in lines { let trimmed = line.trim(); - if trimmed.starts_with("```") { + if let Some(cap) = FENCE_RE.captures(trimmed) { if in_fence { in_fence = false; - } else { + out.push(line.clone()); + continue; + } + + let indent = cap.get(1).map_or("", |m| m.as_str()); + let lang_present = cap.get(3).map_or("", |m| m.as_str()); + + if lang_present.is_empty() { while matches!(out.last(), Some(l) if l.trim().is_empty()) { out.pop(); } if let Some(prev) = out.last() { - let lang = prev.trim().to_string(); - if ORPHAN_LANG_RE.is_match(&lang) { + let lang_owned = prev.trim().to_string(); + if ORPHAN_LANG_RE.is_match(&lang_owned) { out.pop(); - out.push(format!("```{lang}")); + out.push(format!("{indent}```{}", lang_owned.to_lowercase())); in_fence = true; continue; } } - in_fence = true; } + + in_fence = true; + out.push(line.clone()); + continue; } out.push(line.clone()); diff --git a/tests/cli.rs b/tests/cli.rs index d67001b0..3c4622ed 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -193,7 +193,7 @@ fn test_cli_fences_orphan_specifier() { assert!(output.status.success()); assert_eq!( String::from_utf8_lossy(&output.stdout), - "```Rust\nfn main() {}\n```\n" + "```rust\nfn main() {}\n```\n" ); } @@ -219,7 +219,22 @@ fn test_cli_fences_with_renumber() { assert!(output.status.success()); assert_eq!( String::from_utf8_lossy(&output.stdout), - "```Rust\nfn main() {}\n```\n\n1. first\n2. second\n", + "```rust\nfn main() {}\n```\n\n1. first\n2. second\n", + ); +} + +#[test] +fn test_cli_fences_preserve_existing_language() { + let output = Command::cargo_bin("mdtablefix") + .expect("Failed to create cargo command for mdtablefix") + .arg("--fences") + .write_stdin("ruby\n```rust\nfn main() {}\n```\n") + .output() + .expect("Failed to execute mdtablefix command"); + assert!(output.status.success()); + assert_eq!( + String::from_utf8_lossy(&output.stdout), + "ruby\n```rust\nfn main() {}\n```\n" ); } diff --git a/tests/fences.rs b/tests/fences.rs index bffe9d16..43e839db 100644 --- a/tests/fences.rs +++ b/tests/fences.rs @@ -47,28 +47,28 @@ fn leaves_other_lines_untouched() { fn fixes_orphaned_specifier() { let input = lines_vec!["Rust", "```", "fn main() {}", "```"]; let out = attach_orphan_specifiers(&compress_fences(&input)); - assert_eq!(out, lines_vec!["```Rust", "fn main() {}", "```"]); + assert_eq!(out, lines_vec!["```rust", "fn main() {}", "```"]); } #[test] fn attaches_orphan_specifier_unit() { let input = lines_vec!["Rust", "```", "fn main() {}", "```"]; let out = attach_orphan_specifiers(&input); - assert_eq!(out, lines_vec!["```Rust", "fn main() {}", "```"]); + assert_eq!(out, lines_vec!["```rust", "fn main() {}", "```"]); } #[test] fn attaches_orphan_specifier_with_blank_line_unit() { let input = lines_vec!["Rust", "", "```", "fn main() {}", "```"]; let out = attach_orphan_specifiers(&input); - assert_eq!(out, lines_vec!["```Rust", "fn main() {}", "```"]); + assert_eq!(out, lines_vec!["```rust", "fn main() {}", "```"]); } #[test] fn fixes_orphaned_specifier_with_blank_line() { let input = lines_vec!["Rust", "", "```", "fn main() {}", "```"]; let out = attach_orphan_specifiers(&compress_fences(&input)); - assert_eq!(out, lines_vec!["```Rust", "fn main() {}", "```"]); + assert_eq!(out, lines_vec!["```rust", "fn main() {}", "```"]); } #[test] @@ -87,10 +87,10 @@ fn fixes_multiple_orphaned_specifiers() { assert_eq!( out, lines_vec![ - "```Rust", + "```rust", "fn main() {}", "```", - "```Python", + "```python", "print('hi')", "```" ] @@ -112,3 +112,10 @@ fn does_not_attach_non_orphan_lines_before_fences() { let out = attach_orphan_specifiers(&input); assert_eq!(out, input); } + +#[test] +fn does_not_overwrite_existing_fence() { + let input = lines_vec!["ruby", "```rust", "fn main() {}", "```"]; + let out = attach_orphan_specifiers(&compress_fences(&input)); + assert_eq!(out, lines_vec!["ruby", "```rust", "fn main() {}", "```"]); +} From b6be5ee4ed6bf6eda5b876166451feba68e7f38c Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 21 Jul 2025 14:17:51 +0100 Subject: [PATCH 2/4] Extend fence specifier regex --- src/fences.rs | 6 ++++-- tests/cli.rs | 15 +++++++++++++++ tests/fences.rs | 28 ++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/fences.rs b/src/fences.rs index 2e6670cf..97faea09 100644 --- a/src/fences.rs +++ b/src/fences.rs @@ -11,7 +11,9 @@ use regex::Regex; static FENCE_RE: LazyLock = LazyLock::new(|| Regex::new(r"^(\s*)(`{3,}|~{3,})([A-Za-z0-9_+.,-]*)\s*$").unwrap()); -static ORPHAN_LANG_RE: LazyLock = LazyLock::new(|| Regex::new(r"^\w+(?:,\w+)*$").unwrap()); +static ORPHAN_LANG_RE: LazyLock = LazyLock::new(|| { + Regex::new(r"^[A-Za-z0-9_+.-]*[A-Za-z0-9_+\-](?:,[A-Za-z0-9_+.-]*[A-Za-z0-9_+\-])*$").unwrap() +}); /// Compress backtick fences to exactly three backticks. /// @@ -61,7 +63,7 @@ pub fn compress_fences(lines: &[String]) -> Vec { /// "```".to_string(), /// ]; /// let fixed = attach_orphan_specifiers(&compress_fences(&lines)); -/// assert_eq!(fixed[0], "```Rust"); +/// assert_eq!(fixed[0], "```rust"); /// ``` #[must_use] pub fn attach_orphan_specifiers(lines: &[String]) -> Vec { diff --git a/tests/cli.rs b/tests/cli.rs index 3c4622ed..9bc63548 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -238,6 +238,21 @@ fn test_cli_fences_preserve_existing_language() { ); } +#[test] +fn test_cli_fences_orphan_specifier_symbols() { + let output = Command::cargo_bin("mdtablefix") + .expect("Failed to create cargo command for mdtablefix") + .arg("--fences") + .write_stdin("C++\n```\nfn main() {}\n```\n") + .output() + .expect("Failed to execute mdtablefix command"); + assert!(output.status.success()); + assert_eq!( + String::from_utf8_lossy(&output.stdout), + "```c++\nfn main() {}\n```\n" + ); +} + /// Tests the CLI `--footnotes` option to convert bare footnote links. #[test] fn test_cli_footnotes_option() { diff --git a/tests/fences.rs b/tests/fences.rs index 43e839db..c4e22e25 100644 --- a/tests/fences.rs +++ b/tests/fences.rs @@ -119,3 +119,31 @@ fn does_not_overwrite_existing_fence() { let out = attach_orphan_specifiers(&compress_fences(&input)); assert_eq!(out, lines_vec!["ruby", "```rust", "fn main() {}", "```"]); } + +#[test] +fn attaches_orphan_specifier_with_symbols() { + let input = lines_vec!["C++", "```", "fn main() {}", "```"]; + let out = attach_orphan_specifiers(&compress_fences(&input)); + assert_eq!(out, lines_vec!["```c++", "fn main() {}", "```"]); +} + +#[test] +fn attaches_orphan_specifier_with_hyphen_and_dot() { + let input = lines_vec!["objective-c", "```", "int main() {}", "```"]; + let out = attach_orphan_specifiers(&compress_fences(&input)); + assert_eq!(out, lines_vec!["```objective-c", "int main() {}", "```"]); +} + +#[test] +fn does_not_attach_specifier_with_trailing_period() { + let input = lines_vec!["rust.", "```", "fn main() {}", "```"]; + let out = attach_orphan_specifiers(&input); + assert_eq!(out, input); +} + +#[test] +fn does_not_attach_specifier_with_trailing_question_mark() { + let input = lines_vec!["rust?", "```", "fn main() {}", "```"]; + let out = attach_orphan_specifiers(&input); + assert_eq!(out, input); +} From 18d30653cec8f3782fbfe9e152aefcd078feddd6 Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 21 Jul 2025 14:17:57 +0100 Subject: [PATCH 3/4] Require blank line before orphan specifier --- src/fences.rs | 17 ++++++++++------- tests/cli.rs | 16 ++++++++++++++++ tests/fences.rs | 12 ++++++++++++ 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/fences.rs b/src/fences.rs index 97faea09..cff4b3be 100644 --- a/src/fences.rs +++ b/src/fences.rs @@ -83,14 +83,17 @@ pub fn attach_orphan_specifiers(lines: &[String]) -> Vec { let lang_present = cap.get(3).map_or("", |m| m.as_str()); if lang_present.is_empty() { - while matches!(out.last(), Some(l) if l.trim().is_empty()) { - out.pop(); + let mut idx = out.len(); + while idx > 0 && out[idx - 1].trim().is_empty() { + idx -= 1; } - if let Some(prev) = out.last() { - let lang_owned = prev.trim().to_string(); - if ORPHAN_LANG_RE.is_match(&lang_owned) { - out.pop(); - out.push(format!("{indent}```{}", lang_owned.to_lowercase())); + if idx > 0 { + let candidate = out[idx - 1].trim().to_string(); + if ORPHAN_LANG_RE.is_match(&candidate) + && (idx == 1 || out[idx - 2].trim().is_empty()) + { + out.truncate(idx - 1); + out.push(format!("{indent}```{}", candidate.to_lowercase())); in_fence = true; continue; } diff --git a/tests/cli.rs b/tests/cli.rs index 9bc63548..d6433888 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -253,6 +253,22 @@ fn test_cli_fences_orphan_specifier_symbols() { ); } +#[test] +fn test_cli_no_attach_without_preceding_blank_line() { + let input = concat!("text\n", "Rust\n", "```\n", "fn main() {}\n", "```\n"); + let output = Command::cargo_bin("mdtablefix") + .expect("Failed to create cargo command for mdtablefix") + .arg("--fences") + .write_stdin(input) + .output() + .expect("Failed to execute mdtablefix command"); + assert!(output.status.success()); + assert_eq!( + String::from_utf8_lossy(&output.stdout), + "text\nRust\n```\nfn main() {}\n```\n", + ); +} + /// Tests the CLI `--footnotes` option to convert bare footnote links. #[test] fn test_cli_footnotes_option() { diff --git a/tests/fences.rs b/tests/fences.rs index c4e22e25..babafc85 100644 --- a/tests/fences.rs +++ b/tests/fences.rs @@ -78,6 +78,7 @@ fn fixes_multiple_orphaned_specifiers() { "```", "fn main() {}", "```", + "", "Python", "```", "print('hi')", @@ -90,6 +91,7 @@ fn fixes_multiple_orphaned_specifiers() { "```rust", "fn main() {}", "```", + "", "```python", "print('hi')", "```" @@ -120,6 +122,16 @@ fn does_not_overwrite_existing_fence() { assert_eq!(out, lines_vec!["ruby", "```rust", "fn main() {}", "```"]); } +#[test] +fn does_not_attach_specifier_without_preceding_blank_line() { + let input = lines_vec!["intro", "Rust", "```", "fn main() {}", "```"]; + let out = attach_orphan_specifiers(&compress_fences(&input)); + assert_eq!( + out, + lines_vec!["intro", "Rust", "```", "fn main() {}", "```"] + ); +} + #[test] fn attaches_orphan_specifier_with_symbols() { let input = lines_vec!["C++", "```", "fn main() {}", "```"]; From da71c35a5bcac72fa2d28740e3978a2e2111f635 Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 21 Jul 2025 14:18:03 +0100 Subject: [PATCH 4/4] Fix markdownlint blank line --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 85cc9a2e..3e2432d0 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,6 @@ cargo install --path . ## Command-line usage - ```bash mdtablefix [--wrap] [--renumber] [--breaks] [--ellipsis] [--fences] [--footnotes] [--in-place] [FILE...] ```