Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
restore-keys: |
${{ runner.os }}-cargo-
- name: Format
run: cargo fmt --all -- --check
run: rustup component add rustfmt --toolchain nightly-2025-06-10 && cargo +nightly-2025-06-10 fmt --all -- --check
Comment thread
leynos marked this conversation as resolved.
- name: Lint
run: cargo clippy --all-targets --all-features -- -D warnings
- name: Test
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ jobs:
restore-keys: |
${{ runner.os }}-cargo-
- name: Build release binary
run: cross build --release --target ${{ matrix.target }}
run: cross +stable build --release --target ${{ matrix.target }}
- name: Prepare artifact
run: |
mkdir -p artifacts/${{ matrix.os }}-${{ matrix.arch }}
Expand Down
14 changes: 14 additions & 0 deletions .rustfmt.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
unstable_features = true
comment_width = 100
format_code_in_doc_comments = true
imports_granularity = "Crate"
imports_layout = "HorizontalVertical"
wrap_comments = true
group_imports = "StdExternalCrate"
use_try_shorthand = true
hex_literal_case = "Lower"
format_strings = true
format_macro_matchers = true
fn_single_line = true
condense_wildcard_suffixes = true
use_field_init_shorthand = true
8 changes: 5 additions & 3 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,12 @@ This repository is written in Rust and uses Cargo for building and dependency
management. Contributors should follow these best practices when working on the
project:

- Run cargo fmt, cargo clippy -- -D warnings, and RUSTFLAGS="-D warnings" cargo
test before committing.
- Run `cargo +nightly-2025-06-10 fmt --all` after any change and before
committing. Follow this with `cargo clippy -- -D warnings` and
`RUSTFLAGS="-D warnings" cargo test`.
- Clippy warnings MUST be disallowed.
- Fix any warnings emitted during tests in the code itself rather than silencing them.
- Fix any warnings emitted during tests in the code itself instead of silencing
them.
- Where a function is too long, extract meaningfully named helper functions
adhering to separation of concerns and CQRS.
- Where a function has too many parameters, group related parameters in
Expand Down
3 changes: 3 additions & 0 deletions rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[toolchain]
channel = "nightly-2025-06-10"
components = ["rustfmt", "clippy"]
62 changes: 34 additions & 28 deletions src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
//! are ignored. The resulting Markdown lines are passed to
//! `reflow_table` to ensure consistent column widths.

use html5ever::driver::ParseOpts;
use html5ever::{parse_document, tendril::TendrilSink};
use std::sync::LazyLock;

use html5ever::{driver::ParseOpts, parse_document, tendril::TendrilSink};
use markup5ever_rcdom::{Handle, NodeData, RcDom};
use regex::Regex;
use std::sync::LazyLock;

use crate::is_fence;

Expand Down Expand Up @@ -71,10 +71,10 @@ fn collect_text(handle: &Handle, out: &mut String, last_space: &mut bool) {

/// Walks the DOM tree collecting `<table>` nodes under `handle`.
fn collect_tables(handle: &Handle, tables: &mut Vec<Handle>) {
if let NodeData::Element { name, .. } = &handle.data {
if name.local.as_ref() == "table" {
tables.push(handle.clone());
}
if let NodeData::Element { name, .. } = &handle.data
&& name.local.as_ref() == "table"
{
tables.push(handle.clone());
}
for child in handle.children.borrow().iter() {
collect_tables(child, tables);
Expand All @@ -83,10 +83,10 @@ fn collect_tables(handle: &Handle, tables: &mut Vec<Handle>) {

/// Collects all `<tr>` nodes beneath `handle`.
fn collect_rows(handle: &Handle, rows: &mut Vec<Handle>) {
if let NodeData::Element { name, .. } = &handle.data {
if name.local.as_ref() == "tr" {
rows.push(handle.clone());
}
if let NodeData::Element { name, .. } = &handle.data
&& name.local.as_ref() == "tr"
{
rows.push(handle.clone());
}
for child in handle.children.borrow().iter() {
collect_rows(child, rows);
Expand Down Expand Up @@ -121,16 +121,16 @@ fn table_node_to_markdown(table: &Handle) -> Vec<String> {
let mut cells = Vec::new();
let mut all_header = true;
for child in row.children.borrow().iter() {
if let NodeData::Element { name, .. } = &child.data {
if name.local.as_ref() == "td" || name.local.as_ref() == "th" {
let is_header = if name.local.as_ref() == "th" {
true
} else {
contains_strong(child)
};
all_header &= is_header;
cells.push(node_text(child));
}
if let NodeData::Element { name, .. } = &child.data
&& (name.local.as_ref() == "td" || name.local.as_ref() == "th")
{
let is_header = if name.local.as_ref() == "th" {
true
} else {
contains_strong(child)
};
all_header &= is_header;
cells.push(node_text(child));
}
}
if i == 0 {
Expand Down Expand Up @@ -178,9 +178,12 @@ fn table_lines_to_markdown(lines: &[String]) -> Vec<String> {
}

/// Buffers a single line of HTML, updating nesting depth and emitting completed
/// Buffers a line of HTML table markup and processes the buffer into Markdown when the table is fully closed.
/// Buffers a line of HTML table markup and processes the buffer into Markdown when the table is
/// fully closed.
///
/// Tracks the nesting depth of `<table>` tags, appending each line to the buffer. When all opened tables are closed (depth reaches zero), converts the buffered HTML table lines to Markdown and appends them to the output vector. Resets the buffer and updates the HTML state accordingly.
/// Tracks the nesting depth of `<table>` tags, appending each line to the buffer. When all opened
/// tables are closed (depth reaches zero), converts the buffered HTML table lines to Markdown and
/// appends them to the output vector. Resets the buffer and updates the HTML state accordingly.
fn push_html_line(
line: &str,
buf: &mut Vec<String>,
Expand All @@ -202,7 +205,9 @@ fn push_html_line(

/// Replaces HTML tables in the provided lines with equivalent Markdown table syntax.
///
/// Scans the input lines for HTML `<table>` blocks, converts each detected table to Markdown using `table_lines_to_markdown`, and preserves all other content unchanged. Handles nested tables and maintains original line formatting outside of tables.
/// Scans the input lines for HTML `<table>` blocks, converts each detected table to Markdown using
/// `table_lines_to_markdown`, and preserves all other content unchanged. Handles nested tables and
/// maintains original line formatting outside of tables.
///
/// # Arguments
///
Expand All @@ -216,9 +221,8 @@ fn push_html_line(
///
/// ```no_run
/// use mdtablefix::html_table_to_markdown;
/// let html_lines = vec![
/// "<table><tr><th>Header</th></tr><tr><td>Cell</td></tr></table>".to_string()
/// ];
/// let html_lines =
/// vec!["<table><tr><th>Header</th></tr><tr><td>Cell</td></tr></table>".to_string()];
/// let md_lines = html_table_to_markdown(&html_lines);
/// assert!(md_lines[0].starts_with("| Header |"));
/// ```
Expand Down Expand Up @@ -258,7 +262,9 @@ pub(crate) fn html_table_to_markdown(lines: &[String]) -> Vec<String> {
#[must_use]
/// Converts HTML tables embedded in Markdown lines to Markdown table syntax.
///
/// Scans the input lines, detects HTML table blocks outside of fenced code blocks, and replaces them with equivalent Markdown tables. Fenced code blocks are left unmodified. Handles nested tables and preserves original line formatting outside of tables.
/// Scans the input lines, detects HTML table blocks outside of fenced code blocks, and replaces
/// them with equivalent Markdown tables. Fenced code blocks are left unmodified. Handles nested
/// tables and preserves original line formatting outside of tables.
///
/// # Examples
///
Expand Down
103 changes: 56 additions & 47 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@ pub fn html_table_to_markdown(lines: &[String]) -> Vec<String> {
html::html_table_to_markdown(lines)
}

pub use html::convert_html_tables;
use std::{fs, path::Path};

pub use html::convert_html_tables;
use regex::Regex;
use std::fs;
use std::path::Path;
use textwrap::fill;

/// Splits a markdown table line into trimmed cell strings.
///
/// Removes leading and trailing pipe characters, splits the line by pipes, trims whitespace from each cell, and returns the resulting cell strings as a vector.
/// Removes leading and trailing pipe characters, splits the line by pipes, trims whitespace from
/// each cell, and returns the resulting cell strings as a vector.
///
/// # Examples
///
Expand All @@ -47,13 +47,13 @@ pub fn split_cells(line: &str) -> Vec<String> {
let mut chars = s.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '\\' {
if let Some(&next) = chars.peek() {
if next == '|' {
// `\|` escapes the pipe so it becomes part of the cell
chars.next();
current.push('|');
continue;
}
if let Some(&next) = chars.peek()
&& next == '|'
{
// `\|` escapes the pipe so it becomes part of the cell
chars.next();
current.push('|');
continue;
}
current.push(ch);
continue;
Expand Down Expand Up @@ -105,21 +105,20 @@ fn format_separator_cells(widths: &[usize], sep_cells: &[String]) -> Vec<String>
/// Panics if the internal regex fails to compile.
/// Reflows a broken markdown table into properly aligned rows and columns.
///
/// Takes a slice of strings representing lines of a markdown table, reconstructs the table by splitting and aligning cells, and returns the reflowed table as a vector of strings. If the rows have inconsistent numbers of non-empty columns, the original lines are returned unchanged.
/// Takes a slice of strings representing lines of a markdown table, reconstructs the table by
/// splitting and aligning cells, and returns the reflowed table as a vector of strings. If the rows
/// have inconsistent numbers of non-empty columns, the original lines are returned unchanged.
///
/// # Examples
///
/// ```no_run
/// use mdtablefix::reflow_table;
/// let lines = vec![
/// "| a | b |".to_string(),
/// "| c | d |".to_string(),
/// ];
/// let lines = vec!["| a | b |".to_string(), "| c | d |".to_string()];
/// let fixed = reflow_table(&lines);
/// assert_eq!(fixed, vec![
/// "| a | b |".to_string(),
/// "| c | d |".to_string(),
/// ]);
/// assert_eq!(
/// fixed,
/// vec!["| a | b |".to_string(), "| c | d |".to_string(),]
/// );
/// ```
pub(crate) static SEP_RE: std::sync::LazyLock<Regex> =
std::sync::LazyLock::new(|| Regex::new(r"^[\s|:-]+$").unwrap());
Expand Down Expand Up @@ -150,20 +149,18 @@ pub fn reflow_table(lines: &[String]) -> Vec<String> {
let cleaned = reflow::clean_rows(rows);

let mut output_rows = cleaned.clone();
if let Some(idx) = sep_row_idx {
if idx < output_rows.len() {
output_rows.remove(idx);
}
if let Some(idx) = sep_row_idx
&& idx < output_rows.len()
{
output_rows.remove(idx);
}

if !split_within_line {
if let Some(first_len) = cleaned.first().map(Vec::len) {
let mismatch = cleaned[1..]
.iter()
.any(|row| row.len() != first_len && !row.iter().all(|c| SEP_RE.is_match(c)));
if mismatch {
return lines.to_vec();
}
if !split_within_line && let Some(first_len) = cleaned.first().map(Vec::len) {
let mismatch = cleaned[1..]
.iter()
.any(|row| row.len() != first_len && !row.iter().all(|c| SEP_RE.is_match(c)));
if mismatch {
return lines.to_vec();
}
}

Expand All @@ -174,9 +171,12 @@ pub fn reflow_table(lines: &[String]) -> Vec<String> {
reflow::insert_separator(out, sep_cells, &widths, &indent)
}

/// Processes a stream of markdown lines, reflowing tables while preserving code blocks and other content.
/// Processes a stream of markdown lines, reflowing tables while preserving code blocks and other
/// content.
///
/// Detects fenced code blocks and avoids modifying their contents. Buffers lines that appear to be part of a markdown table and reflows them when the table ends. Non-table lines and code blocks are output unchanged.
/// Detects fenced code blocks and avoids modifying their contents. Buffers lines that appear to be
/// part of a markdown table and reflows them when the table ends. Non-table lines and code blocks
/// are output unchanged.
///
/// # Returns
///
Expand Down Expand Up @@ -221,13 +221,15 @@ static BULLET_RE: std::sync::LazyLock<Regex> =
/// assert!(!is_fence("| foo | bar |"));
/// ```
#[doc(hidden)]
pub fn is_fence(line: &str) -> bool {
FENCE_RE.is_match(line)
}
pub fn is_fence(line: &str) -> bool { FENCE_RE.is_match(line) }

/// Flushes a buffered paragraph to the output, wrapping text to the specified width and applying indentation.
/// Flushes a buffered paragraph to the output, wrapping text to the specified width and applying
/// indentation.
///
/// Concatenates buffered lines into a single paragraph, respecting hard line breaks, and writes the wrapped lines to the output vector with the given indentation. Lines are wrapped to the specified width minus the indentation length. Hard breaks in the buffer force a line break at that point.
/// Concatenates buffered lines into a single paragraph, respecting hard line breaks, and writes the
/// wrapped lines to the output vector with the given indentation. Lines are wrapped to the
/// specified width minus the indentation length. Hard breaks in the buffer force a line break at
/// that point.
fn flush_paragraph(out: &mut Vec<String>, buf: &[(String, bool)], indent: &str, width: usize) {
if buf.is_empty() {
return;
Expand All @@ -254,7 +256,9 @@ fn flush_paragraph(out: &mut Vec<String>, buf: &[(String, bool)], indent: &str,

/// Wraps text lines to a specified width, preserving markdown structure.
///
/// Paragraphs and list items are reflowed to the given width, while code blocks, tables, headers, and blank lines are left unchanged. Indentation and bullet/numbered list prefixes are preserved. Hard line breaks (two spaces or `<br>` tags) are respected.
/// Paragraphs and list items are reflowed to the given width, while code blocks, tables, headers,
/// and blank lines are left unchanged. Indentation and bullet/numbered list prefixes are preserved.
/// Hard line breaks (two spaces or `<br>` tags) are respected.
///
/// # Parameters
/// - `lines`: The input lines of markdown text.
Expand Down Expand Up @@ -370,9 +374,12 @@ pub fn wrap_text(lines: &[String], width: usize) -> Vec<String> {
}

#[must_use]
/// Processes a stream of markdown lines, converting HTML tables, reflowing markdown tables, and wrapping text to 80 columns.
/// Processes a stream of markdown lines, converting HTML tables, reflowing markdown tables, and
/// wrapping text to 80 columns.
///
/// Converts simple HTML tables to markdown, reflows markdown tables for consistent alignment, and wraps paragraphs and list items to 80 characters. Preserves code blocks, headers, and special markdown structures.
/// Converts simple HTML tables to markdown, reflows markdown tables for consistent alignment, and
/// wraps paragraphs and list items to 80 characters. Preserves code blocks, headers, and special
/// markdown structures.
///
/// # Returns
///
Expand All @@ -388,7 +395,9 @@ pub fn wrap_text(lines: &[String], width: usize) -> Vec<String> {
/// "|---|---|".to_string(),
/// "| 1 | 2 |".to_string(),
/// "".to_string(),
/// "A paragraph that will be wrapped to fit within eighty columns. This sentence is intentionally long to demonstrate wrapping.".to_string(),
/// "A paragraph that will be wrapped to fit within eighty columns. This sentence is \
/// intentionally long to demonstrate wrapping."
/// .to_string(),
/// ];
/// let output = process_stream(&input);
/// assert!(output.iter().any(|line| line.contains("| foo | bar |")));
Expand Down Expand Up @@ -460,9 +469,7 @@ fn process_stream_inner(lines: &[String], wrap: bool) -> Vec<String> {
}

#[must_use]
pub fn process_stream(lines: &[String]) -> Vec<String> {
process_stream_inner(lines, true)
}
pub fn process_stream(lines: &[String]) -> Vec<String> { process_stream_inner(lines, true) }

#[must_use]
pub fn process_stream_no_wrap(lines: &[String]) -> Vec<String> {
Expand All @@ -472,14 +479,16 @@ pub fn process_stream_no_wrap(lines: &[String]) -> Vec<String> {
/// Rewrite a file in place with fixed tables.
///
/// # Errors
/// Reads a markdown file, reflows any broken tables within it, and writes the updated content back to the same file.
/// Reads a markdown file, reflows any broken tables within it, and writes the updated content back
/// to the same file.
///
/// Returns an error if the file cannot be read or written.
///
/// # Examples
///
/// ```no_run
/// use std::path::Path;
///
/// use mdtablefix::rewrite;
/// let path = Path::new("example.md");
/// rewrite(path).unwrap();
Expand Down
Loading