Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 159 additions & 47 deletions src/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,132 @@ use crate::{
/// Column width used when wrapping text.
pub(crate) const WRAP_COLS: usize = 80;

/// Processing options controlling the behaviour of `process_stream_inner`.
/// Processing options controlling the behaviour of [`process_stream_inner`].
///
/// # Examples
///
/// ```
/// use mdtablefix::process::{Options, process_stream_opts};
///
/// let lines = vec!["example".to_string()];
/// let opts = Options {
/// wrap: false,
/// ellipsis: false,
/// fences: false,
/// footnotes: false,
/// };
/// let out = process_stream_opts(&lines, opts);
/// assert_eq!(out, vec!["example"]);
/// ```
#[expect(
clippy::struct_excessive_bools,
reason = "Options map directly to CLI flags"
)]
#[derive(Clone, Copy, Default)]
pub struct Options {
/// Enable paragraph wrapping
/// Enable paragraph wrapping.
pub wrap: bool,
/// Replace `...` with `…`
/// Replace `...` with `…`.
pub ellipsis: bool,
/// Normalise code block fences
/// Normalise code block fences.
pub fences: bool,
/// Convert bare numeric references to footnotes
/// Convert bare numeric references to footnotes.
pub footnotes: bool,
}

/// Flushes buffered lines to `out`, formatting as a table when required.
fn flush_buffer(buf: &mut Vec<String>, in_table: &mut bool, out: &mut Vec<String>) {
if buf.is_empty() {
return;
}
if *in_table {
out.extend(reflow_table(buf));
buf.clear();
} else {
out.extend(std::mem::take(buf));
}
*in_table = false;
}

/// Detects fence lines and toggles code mode, flushing buffered content.
fn handle_fence_line(
line: &str,
buf: &mut Vec<String>,
in_code: &mut bool,
in_table: &mut bool,
out: &mut Vec<String>,
) -> bool {
if wrap::is_fence(line) {
flush_buffer(buf, in_table, out);
*in_code = !*in_code;
out.push(line.to_string());
return true;
Comment thread
leynos marked this conversation as resolved.
}
false
}

/// Buffers table lines, returning `true` when a line was consumed.
fn handle_table_line(
line: &str,
buf: &mut Vec<String>,
in_table: &mut bool,
out: &mut Vec<String>,
) -> bool {
if line.trim_start().starts_with('|') {
*in_table = true;
buf.push(line.trim_end().to_string());
return true;
}
if line.trim().is_empty() {
if *in_table {
flush_buffer(buf, in_table, out);
}
return false;
}
if *in_table && (line.contains('|') || crate::table::SEP_RE.is_match(line.trim())) {
buf.push(line.trim_end().to_string());
return true;
}
if *in_table {
let trimmed = line.trim_start();
let new_block = trimmed.starts_with('#')
|| trimmed.starts_with('*')
|| trimmed.starts_with('-')
|| trimmed.starts_with('>')
|| trimmed.chars().next().is_some_and(|c| c.is_ascii_digit());
if new_block {
flush_buffer(buf, in_table, out);
return false;
}
buf.push(line.trim_end().to_string());
return true;
}
false
}

/// Processes a stream of Markdown lines using the provided [`Options`].
///
/// The function normalises code fences, converts HTML tables, detects
/// Markdown tables and optionally wraps paragraphs. The exact behaviour is
/// controlled by `opts`.
///
/// # Examples
///
/// ```
/// use mdtablefix::process::{Options, process_stream_inner};
///
/// let lines = vec!["| a | b |".to_string(), "|---|---|".to_string()];
/// let out = process_stream_inner(
/// &lines,
/// Options {
/// wrap: false,
/// ellipsis: false,
/// fences: false,
/// footnotes: false,
/// },
/// );
/// assert!(out.iter().any(|l| l.contains("| a | b |")));
/// ```
#[must_use]
pub fn process_stream_inner(lines: &[String], opts: Options) -> Vec<String> {
let lines = if opts.fences {
Expand All @@ -46,17 +155,7 @@ pub fn process_stream_inner(lines: &[String], opts: Options) -> Vec<String> {
let mut in_table = false;

for line in &pre {
if wrap::is_fence(line) {
if !buf.is_empty() {
if in_table {
out.extend(reflow_table(&buf));
} else {
out.extend(buf.clone());
}
buf.clear();
}
in_code = !in_code;
out.push(line.to_string());
if handle_fence_line(line, &mut buf, &mut in_code, &mut in_table, &mut out) {
continue;
}

Expand All @@ -65,39 +164,15 @@ pub fn process_stream_inner(lines: &[String], opts: Options) -> Vec<String> {
continue;
}

if line.trim_start().starts_with('|') {
if !in_table {
in_table = true;
}
buf.push(line.trim_end().to_string());
continue;
}

if in_table && !line.trim().is_empty() {
buf.push(line.trim_end().to_string());
if handle_table_line(line, &mut buf, &mut in_table, &mut out) {
continue;
}

if !buf.is_empty() {
if in_table {
out.extend(reflow_table(&buf));
} else {
out.extend(buf.clone());
}
buf.clear();
in_table = false;
}

flush_buffer(&mut buf, &mut in_table, &mut out);
out.push(line.to_string());
}

if !buf.is_empty() {
if in_table {
out.extend(reflow_table(&buf));
} else {
out.extend(buf);
}
}
flush_buffer(&mut buf, &mut in_table, &mut out);

let mut out = if opts.wrap {
wrap_text(&out, WRAP_COLS)
Expand All @@ -113,6 +188,20 @@ pub fn process_stream_inner(lines: &[String], opts: Options) -> Vec<String> {
out
}

/// Processes a Markdown stream with all default options enabled.
///
/// This is the primary convenience function used by the command-line
/// interface. Paragraphs are wrapped and tables are reflowed.
///
/// # Examples
///
/// ```
/// use mdtablefix::process::process_stream;
///
/// let lines = vec!["| a | b |".to_string(), "|---|---|".to_string()];
/// let out = process_stream(&lines);
/// assert!(out.iter().any(|l| l.contains("| a | b |")));
/// ```
#[must_use]
pub fn process_stream(lines: &[String]) -> Vec<String> {
process_stream_inner(
Expand All @@ -124,20 +213,43 @@ pub fn process_stream(lines: &[String]) -> Vec<String> {
)
}

/// Process a Markdown stream without wrapping paragraphs.
/// Processes Markdown without wrapping paragraphs.
///
/// Useful when only table reflow and code fence normalisation are required.
///
/// # Examples
///
/// ```
/// use mdtablefix::process_stream_no_wrap;
/// let lines = vec!["one".to_string(), "two".to_string()];
/// use mdtablefix::process::process_stream_no_wrap;
/// let lines = vec!["| a | b |".to_string(), "|---|---|".to_string()];
/// let out = process_stream_no_wrap(&lines);
/// assert_eq!(out, lines);
/// assert!(out.iter().any(|l| l.contains("| a | b |")));
/// ```
#[must_use]
#[inline]
pub fn process_stream_no_wrap(lines: &[String]) -> Vec<String> {
process_stream_inner(lines, Options::default())
}

/// Runs [`process_stream_inner`] with custom [`Options`].
///
/// This is exposed for advanced use cases where callers want precise
/// control over the processing pipeline.
///
/// # Examples
///
/// ```
/// use mdtablefix::process::{Options, process_stream_opts};
/// let lines = vec!["text".to_string()];
/// let opts = Options {
/// wrap: false,
/// ellipsis: false,
/// fences: false,
/// footnotes: false,
/// };
/// let out = process_stream_opts(&lines, opts);
/// assert_eq!(out, vec!["text"]);
/// ```
#[must_use]
pub fn process_stream_opts(lines: &[String], opts: Options) -> Vec<String> {
process_stream_inner(lines, opts)
Expand Down