diff --git a/src/html.rs b/src/html.rs index f618ef5d..b491d7ca 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,3 +1,11 @@ +//! Utilities for converting HTML tables embedded in Markdown into +//! Markdown table syntax. +//! +//! The conversion is intentionally simple: only ``, ``, +//! `
`, and `` tags are recognised. Attributes and tag casing +//! are ignored. The resulting Markdown lines are passed to +//! `reflow_table` to ensure consistent column widths. + use html5ever::driver::ParseOpts; use html5ever::{parse_document, tendril::TendrilSink}; use markup5ever_rcdom::{Handle, NodeData, RcDom}; @@ -10,6 +18,8 @@ static TABLE_START_RE: LazyLock = LazyLock::new(|| Regex::new(r"(?i)^|$)").unwrap()); static TABLE_END_RE: LazyLock = LazyLock::new(|| Regex::new(r"(?i)
").unwrap()); +/// Extracts the text content of a DOM node, collapsing consecutive +/// whitespace to single spaces. fn node_text(handle: &Handle) -> String { let mut out = String::new(); let mut last_space = false; @@ -17,6 +27,8 @@ fn node_text(handle: &Handle) -> String { out.trim().to_string() } +/// Recursively appends text nodes from `handle` to `out`, tracking whether the +/// previous output was whitespace. fn collect_text(handle: &Handle, out: &mut String, last_space: &mut bool) { match &handle.data { NodeData::Text { contents } => { @@ -55,6 +67,7 @@ fn collect_text(handle: &Handle, out: &mut String, last_space: &mut bool) { } } +/// Walks the DOM tree collecting `` nodes under `handle`. fn collect_tables(handle: &Handle, tables: &mut Vec) { if let NodeData::Element { name, .. } = &handle.data { if name.local.as_ref() == "table" { @@ -66,6 +79,7 @@ fn collect_tables(handle: &Handle, tables: &mut Vec) { } } +/// Collects all `` nodes beneath `handle`. fn collect_rows(handle: &Handle, rows: &mut Vec) { if let NodeData::Element { name, .. } = &handle.data { if name.local.as_ref() == "tr" { @@ -77,6 +91,8 @@ fn collect_rows(handle: &Handle, rows: &mut Vec) { } } +/// Converts a `
` DOM node into Markdown table lines and calls +/// `reflow_table` so the columns are uniformly padded. fn table_node_to_markdown(table: &Handle) -> Vec { let mut row_handles = Vec::new(); collect_rows(table, &mut row_handles); @@ -111,6 +127,9 @@ fn table_node_to_markdown(table: &Handle) -> Vec { crate::reflow_table(&out) } +/// Parses HTML table markup and returns the equivalent Markdown lines. +/// +/// If no `
` elements are present, the input is returned unchanged. fn table_lines_to_markdown(lines: &[String]) -> Vec { let indent: String = lines .first() @@ -139,6 +158,8 @@ fn table_lines_to_markdown(lines: &[String]) -> Vec { out } +/// Buffers a single line of HTML, updating nesting depth and emitting completed +/// tables when an end tag is encountered. fn push_html_line( line: &str, buf: &mut Vec, @@ -158,6 +179,7 @@ fn push_html_line( } } +/// Converts any HTML tables in `lines` to Markdown syntax. pub(crate) fn html_table_to_markdown(lines: &[String]) -> Vec { let mut out = Vec::new(); let mut buf = Vec::new(); @@ -187,6 +209,11 @@ pub(crate) fn html_table_to_markdown(lines: &[String]) -> Vec { out } +/// Processes Markdown lines and converts embedded HTML tables to Markdown. +/// +/// Fenced code blocks are left untouched, allowing raw HTML examples to be +/// documented without modification. +#[must_use] pub fn convert_html_tables(lines: &[String]) -> Vec { let mut out = Vec::new(); let mut buf = Vec::new();