Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 40 additions & 2 deletions src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,9 @@ fn table_lines_to_markdown(lines: &[String]) -> Vec<String> {
}

/// Buffers a single line of HTML, updating nesting depth and emitting completed
/// tables when an end tag is encountered.
/// Buffers a line of HTML table markup and processes the buffer into Markdown when the table is fully closed.
///
/// Tracks the nesting depth of `<table>` tags, appending each line to the buffer. When all opened tables are closed (depth reaches zero), converts the buffered HTML table lines to Markdown and appends them to the output vector. Resets the buffer and updates the HTML state accordingly.
fn push_html_line(
line: &str,
buf: &mut Vec<String>,
Expand All @@ -196,7 +198,27 @@ fn push_html_line(
}
}

/// Converts any HTML tables in `lines` to Markdown syntax.
/// Replaces HTML tables in the provided lines with equivalent Markdown table syntax.
///
/// Scans the input lines for HTML `<table>` blocks, converts each detected table to Markdown using `table_lines_to_markdown`, and preserves all other content unchanged. Handles nested tables and maintains original line formatting outside of tables.
///
/// # Arguments
///
/// * `lines` - A slice of strings representing lines of Markdown, possibly containing HTML tables.
///
/// # Returns
///
/// A vector of strings with HTML tables replaced by Markdown tables, leaving other lines intact.
///
/// # Examples
///
/// ```
/// let html_lines = vec![
/// "<table><tr><th>Header</th></tr><tr><td>Cell</td></tr></table>".to_string()
/// ];
/// let md_lines = html_table_to_markdown(&html_lines);
/// assert!(md_lines[0].starts_with("| Header |"));
/// ```
pub(crate) fn html_table_to_markdown(lines: &[String]) -> Vec<String> {
let mut out = Vec::new();
let mut buf = Vec::new();
Expand Down Expand Up @@ -231,6 +253,22 @@ pub(crate) fn html_table_to_markdown(lines: &[String]) -> Vec<String> {
/// Fenced code blocks are left untouched, allowing raw HTML examples to be
/// documented without modification.
#[must_use]
/// Converts HTML tables embedded in Markdown lines to Markdown table syntax.
///
/// Scans the input lines, detects HTML table blocks outside of fenced code blocks, and replaces them with equivalent Markdown tables. Fenced code blocks are left unmodified. Handles nested tables and preserves original line formatting outside of tables.
///
/// # Examples
///
/// ```
/// let lines = vec![
/// "<table>".to_string(),
/// " <tr><th>Header</th></tr>".to_string(),
/// " <tr><td>Cell</td></tr>".to_string(),
/// "</table>".to_string(),
/// ];
/// let result = convert_html_tables(&lines);
/// assert!(result[0].starts_with("| Header |"));
/// ```
pub fn convert_html_tables(lines: &[String]) -> Vec<String> {
let mut out = Vec::new();
let mut buf = Vec::new();
Expand Down
66 changes: 66 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,10 +249,22 @@ static FENCE_RE: std::sync::LazyLock<Regex> =
static BULLET_RE: std::sync::LazyLock<Regex> =
std::sync::LazyLock::new(|| Regex::new(r"^(\s*(?:[-*+]|\d+[.)])\s+)(.*)").unwrap());

/// Returns `true` if the line is a fenced code block delimiter (e.g., "```" or "~~~").
///
/// # Examples
///
/// ```
/// assert!(is_fence("```"));
/// assert!(is_fence("~~~"));
/// assert!(!is_fence("| foo | bar |"));
/// ```
pub(crate) fn is_fence(line: &str) -> bool {
FENCE_RE.is_match(line)
}

/// Flushes a buffered paragraph to the output, wrapping text to the specified width and applying indentation.
///
/// Concatenates buffered lines into a single paragraph, respecting hard line breaks, and writes the wrapped lines to the output vector with the given indentation. Lines are wrapped to the specified width minus the indentation length. Hard breaks in the buffer force a line break at that point.
fn flush_paragraph(out: &mut Vec<String>, buf: &[(String, bool)], indent: &str, width: usize) {
if buf.is_empty() {
return;
Expand All @@ -277,6 +289,37 @@ fn flush_paragraph(out: &mut Vec<String>, buf: &[(String, bool)], indent: &str,
}
}

/// Wraps text lines to a specified width, preserving markdown structure.
///
/// Paragraphs and list items are reflowed to the given width, while code blocks, tables, headers, and blank lines are left unchanged. Indentation and bullet/numbered list prefixes are preserved. Hard line breaks (two spaces or `<br>` tags) are respected.
///
/// # Parameters
/// - `lines`: The input lines of markdown text.
/// - `width`: The maximum line width for wrapping.
///
/// # Returns
/// A vector of strings containing the wrapped and formatted markdown lines.
///
/// # Examples
///
/// ```
/// let input = vec![
/// "This is a long paragraph that should be wrapped to a shorter width.".to_string(),
/// "".to_string(),
/// "```".to_string(),
/// "let x = 42;".to_string(),
/// "```".to_string(),
/// ];
/// let wrapped = wrap_text(&input, 20);
/// assert_eq!(wrapped[0], "This is a long");
/// assert_eq!(wrapped[1], "paragraph that should");
/// assert_eq!(wrapped[2], "be wrapped to a");
/// assert_eq!(wrapped[3], "shorter width.");
/// assert_eq!(wrapped[4], "");
/// assert_eq!(wrapped[5], "```");
/// assert_eq!(wrapped[6], "let x = 42;");
/// assert_eq!(wrapped[7], "```");
/// ```
fn wrap_text(lines: &[String], width: usize) -> Vec<String> {
let mut out = Vec::new();
let mut buf: Vec<(String, bool)> = Vec::new();
Expand Down Expand Up @@ -362,6 +405,29 @@ fn wrap_text(lines: &[String], width: usize) -> Vec<String> {
}

#[must_use]
/// Processes a stream of markdown lines, converting HTML tables, reflowing markdown tables, and wrapping text to 80 columns.
///
/// Converts simple HTML tables to markdown, reflows markdown tables for consistent alignment, and wraps paragraphs and list items to 80 characters. Preserves code blocks, headers, and special markdown structures.
///
/// # Returns
///
/// A vector of processed markdown lines with tables fixed and text wrapped.
///
/// # Examples
///
/// ```
/// let input = vec![
/// "<table><tr><td>foo</td><td>bar</td></tr></table>".to_string(),
/// "| a | b |".to_string(),
/// "|---|---|".to_string(),
/// "| 1 | 2 |".to_string(),
/// "".to_string(),
/// "A paragraph that will be wrapped to fit within eighty columns. This sentence is intentionally long to demonstrate wrapping.".to_string(),
/// ];
/// let output = process_stream(&input);
/// assert!(output.iter().any(|line| line.contains("| foo | bar |")));
/// assert!(output.iter().any(|line| line.len() <= 80));
/// ```
pub fn process_stream(lines: &[String]) -> Vec<String> {
let pre = html::convert_html_tables(lines);

Expand Down
21 changes: 21 additions & 0 deletions tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,10 @@ fn test_logical_type_table_output_matches() {
}

#[test]
/// Verifies that reflowing the option table input produces the expected output.
///
/// Loads the input and expected output from external files and asserts that the
/// `reflow_table` function transforms the input table to match the expected result.
fn test_option_table_output_matches() {
let input: Vec<String> = include_str!("data/option_table_input.txt")
.lines()
Expand All @@ -489,6 +493,9 @@ fn test_option_table_output_matches() {
}

#[test]
/// Tests that long paragraphs are wrapped at 80 columns by `process_stream`.
///
/// Ensures that a single long paragraph is split into multiple lines, each not exceeding 80 characters.
fn test_wrap_paragraph() {
let input = vec![
"This is a very long paragraph that should be wrapped at eighty columns \
Expand Down Expand Up @@ -518,13 +525,27 @@ fn test_wrap_list_item() {
}

#[test]
/// Verifies that short list items are not wrapped or altered by the stream processing logic.
///
/// Ensures that a single-line bullet list item remains unchanged after processing.
///
/// # Examples
///
/// ```
/// let input = vec!["- short item".to_string()];
/// let output = process_stream(&input);
/// assert_eq!(output, input);
/// ```
fn test_wrap_short_list_item() {
let input = vec!["- short item".to_string()];
let output = process_stream(&input);
assert_eq!(output, input);
}

#[test]
/// Tests that lines with hard line breaks (trailing spaces) are preserved after processing.
///
/// Ensures that the `process_stream` function does not remove or alter lines ending with Markdown hard line breaks.
fn test_preserve_hard_line_breaks() {
let input = vec![
"Line one with break. ".to_string(),
Expand Down