From 62df65d61312bcb47fb20a472103bcfc1e062012 Mon Sep 17 00:00:00 2001 From: Leynos Date: Sat, 14 Jun 2025 04:34:49 +0100 Subject: [PATCH 1/2] Add docstrings to html module --- src/html.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/html.rs b/src/html.rs index f618ef5d..86df6176 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,3 +1,11 @@ +//! Utilities for converting HTML tables embedded in Markdown into +//! Markdown table syntax. +//! +//! The conversion is intentionally simple: only ``, ``, +//! `
`, and `` tags are recognised. Attributes and tag casing +//! are ignored. The resulting Markdown lines are passed to +//! `reflow_table` to ensure consistent column widths. + use html5ever::driver::ParseOpts; use html5ever::{parse_document, tendril::TendrilSink}; use markup5ever_rcdom::{Handle, NodeData, RcDom}; @@ -10,6 +18,8 @@ static TABLE_START_RE: LazyLock = LazyLock::new(|| Regex::new(r"(?i)^|$)").unwrap()); static TABLE_END_RE: LazyLock = LazyLock::new(|| Regex::new(r"(?i)
").unwrap()); +/// Extracts the text content of a DOM node, collapsing consecutive +/// whitespace to single spaces. fn node_text(handle: &Handle) -> String { let mut out = String::new(); let mut last_space = false; @@ -17,6 +27,8 @@ fn node_text(handle: &Handle) -> String { out.trim().to_string() } +/// Recursively appends text nodes from `handle` to `out`, tracking whether the +/// previous character was whitespace. fn collect_text(handle: &Handle, out: &mut String, last_space: &mut bool) { match &handle.data { NodeData::Text { contents } => { @@ -55,6 +67,7 @@ fn collect_text(handle: &Handle, out: &mut String, last_space: &mut bool) { } } +/// Walks the DOM tree collecting `` nodes under `handle`. fn collect_tables(handle: &Handle, tables: &mut Vec) { if let NodeData::Element { name, .. } = &handle.data { if name.local.as_ref() == "table" { @@ -66,6 +79,7 @@ fn collect_tables(handle: &Handle, tables: &mut Vec) { } } +/// Collects all `` nodes beneath `handle`. fn collect_rows(handle: &Handle, rows: &mut Vec) { if let NodeData::Element { name, .. } = &handle.data { if name.local.as_ref() == "tr" { @@ -77,6 +91,7 @@ fn collect_rows(handle: &Handle, rows: &mut Vec) { } } +/// Converts a `
` DOM node into Markdown table lines. fn table_node_to_markdown(table: &Handle) -> Vec { let mut row_handles = Vec::new(); collect_rows(table, &mut row_handles); @@ -111,6 +126,7 @@ fn table_node_to_markdown(table: &Handle) -> Vec { crate::reflow_table(&out) } +/// Parses HTML table markup and returns the equivalent Markdown lines. fn table_lines_to_markdown(lines: &[String]) -> Vec { let indent: String = lines .first() @@ -139,6 +155,8 @@ fn table_lines_to_markdown(lines: &[String]) -> Vec { out } +/// Buffers a single line of HTML, updating nesting depth and emitting completed +/// tables when an end tag is encountered. fn push_html_line( line: &str, buf: &mut Vec, @@ -158,6 +176,7 @@ fn push_html_line( } } +/// Converts any HTML tables in `lines` to Markdown syntax. pub(crate) fn html_table_to_markdown(lines: &[String]) -> Vec { let mut out = Vec::new(); let mut buf = Vec::new(); @@ -187,6 +206,11 @@ pub(crate) fn html_table_to_markdown(lines: &[String]) -> Vec { out } +/// Processes Markdown lines and converts embedded HTML tables to Markdown. +/// +/// Fenced code blocks are left untouched, allowing raw HTML examples to be +/// documented without modification. +#[must_use] pub fn convert_html_tables(lines: &[String]) -> Vec { let mut out = Vec::new(); let mut buf = Vec::new(); From b47e2ccd31b1e213f17d346cd2d6c12a06ba931a Mon Sep 17 00:00:00 2001 From: Leynos Date: Sat, 14 Jun 2025 04:56:05 +0100 Subject: [PATCH 2/2] Refine HTML docstrings --- src/html.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/html.rs b/src/html.rs index 86df6176..b491d7ca 100644 --- a/src/html.rs +++ b/src/html.rs @@ -28,7 +28,7 @@ fn node_text(handle: &Handle) -> String { } /// Recursively appends text nodes from `handle` to `out`, tracking whether the -/// previous character was whitespace. +/// previous output was whitespace. fn collect_text(handle: &Handle, out: &mut String, last_space: &mut bool) { match &handle.data { NodeData::Text { contents } => { @@ -91,7 +91,8 @@ fn collect_rows(handle: &Handle, rows: &mut Vec) { } } -/// Converts a `
` DOM node into Markdown table lines. +/// Converts a `
` DOM node into Markdown table lines and calls +/// `reflow_table` so the columns are uniformly padded. fn table_node_to_markdown(table: &Handle) -> Vec { let mut row_handles = Vec::new(); collect_rows(table, &mut row_handles); @@ -127,6 +128,8 @@ fn table_node_to_markdown(table: &Handle) -> Vec { } /// Parses HTML table markup and returns the equivalent Markdown lines. +/// +/// If no `
` elements are present, the input is returned unchanged. fn table_lines_to_markdown(lines: &[String]) -> Vec { let indent: String = lines .first()