`,
+//! `| `, and ` | ` tags are recognised. Attributes and tag casing
+//! are ignored. The resulting Markdown lines are passed to
+//! `reflow_table` to ensure consistent column widths.
+
use html5ever::driver::ParseOpts;
use html5ever::{parse_document, tendril::TendrilSink};
use markup5ever_rcdom::{Handle, NodeData, RcDom};
@@ -10,6 +18,8 @@ static TABLE_START_RE: LazyLock =
LazyLock::new(|| Regex::new(r"(?i)^|$)").unwrap());
static TABLE_END_RE: LazyLock = LazyLock::new(|| Regex::new(r"(?i) ").unwrap());
+/// Extracts the text content of a DOM node, collapsing consecutive
+/// whitespace to single spaces.
fn node_text(handle: &Handle) -> String {
let mut out = String::new();
let mut last_space = false;
@@ -17,6 +27,8 @@ fn node_text(handle: &Handle) -> String {
out.trim().to_string()
}
+/// Recursively appends text nodes from `handle` to `out`, tracking whether the
+/// previous output was whitespace.
fn collect_text(handle: &Handle, out: &mut String, last_space: &mut bool) {
match &handle.data {
NodeData::Text { contents } => {
@@ -55,6 +67,7 @@ fn collect_text(handle: &Handle, out: &mut String, last_space: &mut bool) {
}
}
+/// Walks the DOM tree collecting `` nodes under `handle`.
fn collect_tables(handle: &Handle, tables: &mut Vec) {
if let NodeData::Element { name, .. } = &handle.data {
if name.local.as_ref() == "table" {
@@ -66,6 +79,7 @@ fn collect_tables(handle: &Handle, tables: &mut Vec) {
}
}
+/// Collects all `` nodes beneath `handle`.
fn collect_rows(handle: &Handle, rows: &mut Vec) {
if let NodeData::Element { name, .. } = &handle.data {
if name.local.as_ref() == "tr" {
@@ -77,6 +91,8 @@ fn collect_rows(handle: &Handle, rows: &mut Vec) {
}
}
+/// Converts a `` DOM node into Markdown table lines and calls
+/// `reflow_table` so the columns are uniformly padded.
fn table_node_to_markdown(table: &Handle) -> Vec {
let mut row_handles = Vec::new();
collect_rows(table, &mut row_handles);
@@ -111,6 +127,9 @@ fn table_node_to_markdown(table: &Handle) -> Vec {
crate::reflow_table(&out)
}
+/// Parses HTML table markup and returns the equivalent Markdown lines.
+///
+/// If no `` elements are present, the input is returned unchanged.
fn table_lines_to_markdown(lines: &[String]) -> Vec {
let indent: String = lines
.first()
@@ -139,6 +158,8 @@ fn table_lines_to_markdown(lines: &[String]) -> Vec {
out
}
+/// Buffers a single line of HTML, updating nesting depth and emitting completed
+/// tables when an end tag is encountered.
fn push_html_line(
line: &str,
buf: &mut Vec,
@@ -158,6 +179,7 @@ fn push_html_line(
}
}
+/// Converts any HTML tables in `lines` to Markdown syntax.
pub(crate) fn html_table_to_markdown(lines: &[String]) -> Vec {
let mut out = Vec::new();
let mut buf = Vec::new();
@@ -187,6 +209,11 @@ pub(crate) fn html_table_to_markdown(lines: &[String]) -> Vec {
out
}
+/// Processes Markdown lines and converts embedded HTML tables to Markdown.
+///
+/// Fenced code blocks are left untouched, allowing raw HTML examples to be
+/// documented without modification.
+#[must_use]
pub fn convert_html_tables(lines: &[String]) -> Vec {
let mut out = Vec::new();
let mut buf = Vec::new();
|