From 5f50be7ad5af39964bc5ff7344b145e39ed3646b Mon Sep 17 00:00:00 2001 From: Leynos Date: Sun, 20 Jul 2025 15:34:11 +0100 Subject: [PATCH 1/5] Document and refactor process module --- src/process.rs | 215 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 168 insertions(+), 47 deletions(-) diff --git a/src/process.rs b/src/process.rs index 209ca989..7f944304 100644 --- a/src/process.rs +++ b/src/process.rs @@ -12,23 +12,141 @@ use crate::{ /// Column width used when wrapping text. pub(crate) const WRAP_COLS: usize = 80; -/// Processing options controlling the behaviour of `process_stream_inner`. +/// Processing options controlling the behaviour of [`process_stream_inner`]. +/// +/// # Examples +/// +/// ``` +/// use mdtablefix::process::{Options, process_stream_opts}; +/// +/// let lines = vec!["example".to_string()]; +/// let opts = Options { +/// wrap: false, +/// ellipsis: false, +/// fences: false, +/// footnotes: false, +/// }; +/// let out = process_stream_opts(&lines, opts); +/// assert_eq!(out, vec!["example"]); +/// ``` #[expect( clippy::struct_excessive_bools, reason = "Options map directly to CLI flags" )] #[derive(Clone, Copy, Default)] pub struct Options { - /// Enable paragraph wrapping + /// Enable paragraph wrapping. pub wrap: bool, - /// Replace `...` with `…` + /// Replace `...` with `…`. pub ellipsis: bool, - /// Normalise code block fences + /// Normalise code block fences. pub fences: bool, - /// Convert bare numeric references to footnotes + /// Convert bare numeric references to footnotes. pub footnotes: bool, } +/// Flushes buffered lines to `out`, formatting as a table when required. +/// +/// ```no_run +/// # use mdtablefix::table::reflow_table; +/// # fn flush_buffer(buf: &mut Vec, in_table: &mut bool, out: &mut Vec) { +/// # if buf.is_empty() { return; } +/// # if *in_table { out.extend(reflow_table(buf)); buf.clear(); } else { out.extend(buf.drain(..)); } +/// # *in_table = false; +/// # } +/// let mut buf = vec!["| a | b |".to_string()]; +/// let mut out = Vec::new(); +/// let mut in_table = true; +/// flush_buffer(&mut buf, &mut in_table, &mut out); +/// assert!(buf.is_empty()); +/// ``` +fn flush_buffer(buf: &mut Vec, in_table: &mut bool, out: &mut Vec) { + if buf.is_empty() { + return; + } + if *in_table { + out.extend(reflow_table(buf)); + buf.clear(); + } else { + out.append(buf); + } + *in_table = false; +} + +/// Detects fence lines and toggles code mode, flushing buffered content. +/// +/// ```no_run +/// # use mdtablefix::process::{handle_fence_line, flush_buffer}; +/// # use mdtablefix::table::reflow_table; +/// # fn flush_buffer(_: &mut Vec, _: &mut bool, _: &mut Vec) {} +/// let mut out = Vec::new(); +/// let mut buf = Vec::new(); +/// let mut in_code = false; +/// let mut in_table = false; +/// handle_fence_line("```", &mut buf, &mut in_code, &mut in_table, &mut out); +/// assert!(in_code); +/// ``` +fn handle_fence_line( + line: &str, + buf: &mut Vec, + in_code: &mut bool, + in_table: &mut bool, + out: &mut Vec, +) -> bool { + if wrap::is_fence(line) { + flush_buffer(buf, in_table, out); + *in_code = !*in_code; + out.push(line.to_string()); + return true; + } + false +} + +/// Buffers table lines, returning `true` when a line was consumed. +/// +/// ```no_run +/// # use mdtablefix::process::handle_table_line; +/// let mut buf = Vec::new(); +/// let mut in_table = false; +/// assert!(handle_table_line("| a | b |", &mut buf, &mut in_table)); +/// assert!(in_table); +/// ``` +fn handle_table_line(line: &str, buf: &mut Vec, in_table: &mut bool) -> bool { + if line.trim_start().starts_with('|') { + *in_table = true; + buf.push(line.trim_end().to_string()); + return true; + } + if *in_table && !line.trim().is_empty() { + buf.push(line.trim_end().to_string()); + return true; + } + false +} + +/// Processes a stream of Markdown lines using the provided [`Options`]. +/// +/// The function normalises code fences, converts HTML tables, detects +/// Markdown tables and optionally wraps paragraphs. The exact behaviour is +/// controlled by `opts`. +/// +/// # Examples +/// +/// ``` +/// use mdtablefix::process::{Options, process_stream_inner}; +/// +/// let lines = vec!["| a | b |".to_string(), "|---|---|".to_string()]; +/// let out = process_stream_inner( +/// &lines, +/// Options { +/// wrap: false, +/// ellipsis: false, +/// fences: false, +/// footnotes: false, +/// }, +/// ); +/// assert!(out.iter().any(|l| l.contains("| a | b |"))); +/// ``` #[must_use] pub fn process_stream_inner(lines: &[String], opts: Options) -> Vec { let lines = if opts.fences { @@ -46,17 +164,7 @@ pub fn process_stream_inner(lines: &[String], opts: Options) -> Vec { let mut in_table = false; for line in &pre { - if wrap::is_fence(line) { - if !buf.is_empty() { - if in_table { - out.extend(reflow_table(&buf)); - } else { - out.extend(buf.clone()); - } - buf.clear(); - } - in_code = !in_code; - out.push(line.to_string()); + if handle_fence_line(line, &mut buf, &mut in_code, &mut in_table, &mut out) { continue; } @@ -65,39 +173,15 @@ pub fn process_stream_inner(lines: &[String], opts: Options) -> Vec { continue; } - if line.trim_start().starts_with('|') { - if !in_table { - in_table = true; - } - buf.push(line.trim_end().to_string()); + if handle_table_line(line, &mut buf, &mut in_table) { continue; } - if in_table && !line.trim().is_empty() { - buf.push(line.trim_end().to_string()); - continue; - } - - if !buf.is_empty() { - if in_table { - out.extend(reflow_table(&buf)); - } else { - out.extend(buf.clone()); - } - buf.clear(); - in_table = false; - } - + flush_buffer(&mut buf, &mut in_table, &mut out); out.push(line.to_string()); } - if !buf.is_empty() { - if in_table { - out.extend(reflow_table(&buf)); - } else { - out.extend(buf); - } - } + flush_buffer(&mut buf, &mut in_table, &mut out); let mut out = if opts.wrap { wrap_text(&out, WRAP_COLS) @@ -114,6 +198,20 @@ pub fn process_stream_inner(lines: &[String], opts: Options) -> Vec { } #[must_use] +/// Processes a Markdown stream with all default options enabled. +/// +/// This is the primary convenience function used by the command-line +/// interface. Paragraphs are wrapped and tables are reflowed. +/// +/// # Examples +/// +/// ``` +/// use mdtablefix::process::process_stream; +/// +/// let lines = vec!["| a | b |".to_string(), "|---|---|".to_string()]; +/// let out = process_stream(&lines); +/// assert!(out.iter().any(|l| l.contains("| a | b |"))); +/// ``` pub fn process_stream(lines: &[String]) -> Vec { process_stream_inner( lines, @@ -124,13 +222,17 @@ pub fn process_stream(lines: &[String]) -> Vec { ) } -/// Process a Markdown stream without wrapping paragraphs. +/// Processes Markdown without wrapping paragraphs. +/// +/// Useful when only table reflow and code fence normalisation are required. +/// +/// # Examples /// /// ``` -/// use mdtablefix::process_stream_no_wrap; -/// let lines = vec!["one".to_string(), "two".to_string()]; +/// use mdtablefix::process::process_stream_no_wrap; +/// let lines = vec!["| a | b |".to_string(), "|---|---|".to_string()]; /// let out = process_stream_no_wrap(&lines); -/// assert_eq!(out, lines); +/// assert!(out.iter().any(|l| l.contains("| a | b |"))); /// ``` #[must_use] #[inline] @@ -139,6 +241,25 @@ pub fn process_stream_no_wrap(lines: &[String]) -> Vec { } #[must_use] +/// Runs [`process_stream_inner`] with custom [`Options`]. +/// +/// This is exposed for advanced use cases where callers want precise +/// control over the processing pipeline. +/// +/// # Examples +/// +/// ``` +/// use mdtablefix::process::{Options, process_stream_opts}; +/// let lines = vec!["text".to_string()]; +/// let opts = Options { +/// wrap: false, +/// ellipsis: false, +/// fences: false, +/// footnotes: false, +/// }; +/// let out = process_stream_opts(&lines, opts); +/// assert_eq!(out, vec!["text"]); +/// ``` pub fn process_stream_opts(lines: &[String], opts: Options) -> Vec { process_stream_inner(lines, opts) } From 86900f82d187a1985028221286b5d9d9d29a9a21 Mon Sep 17 00:00:00 2001 From: Leynos Date: Sun, 20 Jul 2025 16:10:50 +0100 Subject: [PATCH 2/5] Improve table line detection --- src/process.rs | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/src/process.rs b/src/process.rs index 7f944304..e8e32a3b 100644 --- a/src/process.rs +++ b/src/process.rs @@ -107,17 +107,48 @@ fn handle_fence_line( /// ```no_run /// # use mdtablefix::process::handle_table_line; /// let mut buf = Vec::new(); +/// let mut out = Vec::new(); /// let mut in_table = false; -/// assert!(handle_table_line("| a | b |", &mut buf, &mut in_table)); +/// assert!(handle_table_line( +/// "| a | b |", +/// &mut buf, +/// &mut in_table, +/// &mut out +/// )); /// assert!(in_table); /// ``` -fn handle_table_line(line: &str, buf: &mut Vec, in_table: &mut bool) -> bool { +fn handle_table_line( + line: &str, + buf: &mut Vec, + in_table: &mut bool, + out: &mut Vec, +) -> bool { if line.trim_start().starts_with('|') { *in_table = true; buf.push(line.trim_end().to_string()); return true; } - if *in_table && !line.trim().is_empty() { + if line.trim().is_empty() { + if *in_table { + flush_buffer(buf, in_table, out); + } + return false; + } + if *in_table && (line.contains('|') || crate::table::SEP_RE.is_match(line.trim())) { + buf.push(line.trim_end().to_string()); + return true; + } + if *in_table { + let trimmed = line.trim_start(); + let new_block = trimmed.starts_with('#') + || trimmed.starts_with('*') + || trimmed.starts_with('-') + || trimmed.starts_with('>') + || trimmed.chars().next().is_some_and(|c| c.is_ascii_digit()); + if new_block { + flush_buffer(buf, in_table, out); + return false; + } buf.push(line.trim_end().to_string()); return true; } @@ -173,7 +204,7 @@ pub fn process_stream_inner(lines: &[String], opts: Options) -> Vec { continue; } - if handle_table_line(line, &mut buf, &mut in_table) { + if handle_table_line(line, &mut buf, &mut in_table, &mut out) { continue; } From a8d92718e99ca29aa382c05d0f926e1c70f5645d Mon Sep 17 00:00:00 2001 From: Leynos Date: Sun, 20 Jul 2025 22:34:14 +0100 Subject: [PATCH 3/5] Refine helper docs and attribute order --- src/process.rs | 48 +++++------------------------------------------- 1 file changed, 5 insertions(+), 43 deletions(-) diff --git a/src/process.rs b/src/process.rs index e8e32a3b..fd39448e 100644 --- a/src/process.rs +++ b/src/process.rs @@ -46,20 +46,7 @@ pub struct Options { } /// Flushes buffered lines to `out`, formatting as a table when required. -/// -/// ```no_run -/// # use mdtablefix::table::reflow_table; -/// # fn flush_buffer(buf: &mut Vec, in_table: &mut bool, out: &mut Vec) { -/// # if buf.is_empty() { return; } -/// # if *in_table { out.extend(reflow_table(buf)); buf.clear(); } else { out.extend(buf.drain(..)); } -/// # *in_table = false; -/// # } -/// let mut buf = vec!["| a | b |".to_string()]; -/// let mut out = Vec::new(); -/// let mut in_table = true; -/// flush_buffer(&mut buf, &mut in_table, &mut out); -/// assert!(buf.is_empty()); -/// ``` +#[allow(clippy::extend_with_drain)] // maintain consistency across helpers fn flush_buffer(buf: &mut Vec, in_table: &mut bool, out: &mut Vec) { if buf.is_empty() { return; @@ -68,24 +55,12 @@ fn flush_buffer(buf: &mut Vec, in_table: &mut bool, out: &mut Vec, _: &mut bool, _: &mut Vec) {} -/// let mut out = Vec::new(); -/// let mut buf = Vec::new(); -/// let mut in_code = false; -/// let mut in_table = false; -/// handle_fence_line("```", &mut buf, &mut in_code, &mut in_table, &mut out); -/// assert!(in_code); -/// ``` fn handle_fence_line( line: &str, buf: &mut Vec, @@ -103,20 +78,6 @@ fn handle_fence_line( } /// Buffers table lines, returning `true` when a line was consumed. -/// -/// ```no_run -/// # use mdtablefix::process::handle_table_line; -/// let mut buf = Vec::new(); -/// let mut out = Vec::new(); -/// let mut in_table = false; -/// assert!(handle_table_line( -/// "| a | b |", -/// &mut buf, -/// &mut in_table, -/// &mut out -/// )); -/// assert!(in_table); -/// ``` fn handle_table_line( line: &str, buf: &mut Vec, @@ -228,8 +189,8 @@ pub fn process_stream_inner(lines: &[String], opts: Options) -> Vec { out } -#[must_use] /// Processes a Markdown stream with all default options enabled. +#[must_use] /// /// This is the primary convenience function used by the command-line /// interface. Paragraphs are wrapped and tables are reflowed. @@ -254,6 +215,7 @@ pub fn process_stream(lines: &[String]) -> Vec { } /// Processes Markdown without wrapping paragraphs. +#[must_use] /// /// Useful when only table reflow and code fence normalisation are required. /// @@ -271,8 +233,8 @@ pub fn process_stream_no_wrap(lines: &[String]) -> Vec { process_stream_inner(lines, Options::default()) } -#[must_use] /// Runs [`process_stream_inner`] with custom [`Options`]. +#[must_use] /// /// This is exposed for advanced use cases where callers want precise /// control over the processing pipeline. From c544dfa420932642647245d0f2431cc7fbb55b78 Mon Sep 17 00:00:00 2001 From: Leynos Date: Sun, 20 Jul 2025 23:59:52 +0100 Subject: [PATCH 4/5] Remove unused lint allow --- src/process.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/process.rs b/src/process.rs index fd39448e..ab7d64bd 100644 --- a/src/process.rs +++ b/src/process.rs @@ -46,7 +46,6 @@ pub struct Options { } /// Flushes buffered lines to `out`, formatting as a table when required. -#[allow(clippy::extend_with_drain)] // maintain consistency across helpers fn flush_buffer(buf: &mut Vec, in_table: &mut bool, out: &mut Vec) { if buf.is_empty() { return; @@ -55,7 +54,7 @@ fn flush_buffer(buf: &mut Vec, in_table: &mut bool, out: &mut Vec Date: Mon, 21 Jul 2025 00:39:04 +0100 Subject: [PATCH 5/5] Place must_use after docs --- src/process.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/process.rs b/src/process.rs index ab7d64bd..8d6ac124 100644 --- a/src/process.rs +++ b/src/process.rs @@ -189,7 +189,6 @@ pub fn process_stream_inner(lines: &[String], opts: Options) -> Vec { } /// Processes a Markdown stream with all default options enabled. -#[must_use] /// /// This is the primary convenience function used by the command-line /// interface. Paragraphs are wrapped and tables are reflowed. @@ -203,6 +202,7 @@ pub fn process_stream_inner(lines: &[String], opts: Options) -> Vec { /// let out = process_stream(&lines); /// assert!(out.iter().any(|l| l.contains("| a | b |"))); /// ``` +#[must_use] pub fn process_stream(lines: &[String]) -> Vec { process_stream_inner( lines, @@ -214,7 +214,6 @@ pub fn process_stream(lines: &[String]) -> Vec { } /// Processes Markdown without wrapping paragraphs. -#[must_use] /// /// Useful when only table reflow and code fence normalisation are required. /// @@ -233,7 +232,6 @@ pub fn process_stream_no_wrap(lines: &[String]) -> Vec { } /// Runs [`process_stream_inner`] with custom [`Options`]. -#[must_use] /// /// This is exposed for advanced use cases where callers want precise /// control over the processing pipeline. @@ -252,6 +250,7 @@ pub fn process_stream_no_wrap(lines: &[String]) -> Vec { /// let out = process_stream_opts(&lines, opts); /// assert_eq!(out, vec!["text"]); /// ``` +#[must_use] pub fn process_stream_opts(lines: &[String], opts: Options) -> Vec { process_stream_inner(lines, opts) }