` or
+`` tags inside `` elements to decide whether it is a header. If no such
+markers exist and the table contains multiple rows, the first row is still
+treated as the header, so the Markdown output includes a separator line. This
+last-resort behaviour keeps simple tables readable after conversion.
diff --git a/src/html.rs b/src/html.rs
index 83b347a9..61a867ee 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -113,6 +113,24 @@ fn contains_strong(handle: &Handle) -> bool {
children.iter().any(contains_strong)
}
+/// Extracts cell text from a row and reports whether all cells are header cells.
+fn parse_row(row: &Handle) -> (Vec, bool) {
+ let mut cells = Vec::new();
+ let mut all_header = true;
+ for child in row.children.borrow().iter() {
+ if is_table_cell(child) {
+ let is_header = if is_element(child, "th") {
+ true
+ } else {
+ contains_strong(child)
+ };
+ all_header &= is_header;
+ cells.push(node_text(child));
+ }
+ }
+ (cells, all_header)
+}
+
/// Converts a `` DOM node into Markdown table lines and calls
/// `reflow_table` so the columns are uniformly padded.
fn table_node_to_markdown(table: &Handle) -> Vec {
@@ -122,33 +140,23 @@ fn table_node_to_markdown(table: &Handle) -> Vec {
return Vec::new();
}
+ let (first_cells, explicit_header) = parse_row(&row_handles[0]);
+ let col_count = first_cells.len();
+ let fallback_header = !explicit_header && row_handles.len() > 1;
+ let has_header = explicit_header || fallback_header;
+
let mut out = Vec::new();
- let mut first_header = false;
- let mut col_count = 0;
- for (i, row) in row_handles.iter().enumerate() {
- let mut cells = Vec::new();
- let mut all_header = true;
- for child in row.children.borrow().iter() {
- if is_table_cell(child) {
- let is_header = if is_element(child, "th") {
- true
- } else {
- contains_strong(child)
- };
- all_header &= is_header;
- cells.push(node_text(child));
- }
- }
- if i == 0 {
- first_header = all_header;
- col_count = cells.len();
- }
+ out.push(format!("| {} |", first_cells.join(" | ")));
+ for row in row_handles.iter().skip(1) {
+ let (cells, _) = parse_row(row);
out.push(format!("| {} |", cells.join(" | ")));
}
- if first_header {
+
+ if has_header {
let sep: Vec = (0..col_count).map(|_| "---".to_string()).collect();
out.insert(1, format!("| {} |", sep.join(" | ")));
}
+
crate::reflow_table(&out)
}
diff --git a/tests/integration.rs b/tests/integration.rs
index 959685b8..c7b738ee 100644
--- a/tests/integration.rs
+++ b/tests/integration.rs
@@ -101,6 +101,36 @@ fn html_table_no_header() -> Vec {
)
}
+#[fixture]
+fn html_table_empty_row() -> Vec {
+ lines_vec!(
+ "",
+ )
+}
+
+#[fixture]
+fn html_table_whitespace_header() -> Vec {
+ lines_vec!(
+ "",
+ )
+}
+
+#[fixture]
+fn html_table_inconsistent_first_row() -> Vec {
+ lines_vec!(
+ "",
+ )
+}
+
#[fixture]
fn html_table_empty() -> Vec {
let lines = lines_vec!("");
@@ -457,10 +487,34 @@ fn test_convert_html_table_with_colspan() {
#[test]
fn test_convert_html_table_no_header() {
- let expected = vec!["| A | B |", "| 1 | 2 |"];
+ let expected = vec!["| A | B |", "| --- | --- |", "| 1 | 2 |"];
assert_eq!(convert_html_tables(&html_table_no_header()), expected);
}
+#[test]
+fn test_convert_html_table_empty_row() {
+ let expected = vec!["| 1 | 2 |", "| --- | --- |"];
+ assert_eq!(convert_html_tables(&html_table_empty_row()), expected);
+}
+
+#[test]
+fn test_convert_html_table_whitespace_header() {
+ let expected = vec!["| --- | --- |", "| --- | --- |", "| 1 | 2 |"];
+ assert_eq!(
+ convert_html_tables(&html_table_whitespace_header()),
+ expected
+ );
+}
+
+#[test]
+fn test_convert_html_table_inconsistent_first_row() {
+ let expected = vec!["| A |", "| --- |", "| 1 | 2 |"];
+ assert_eq!(
+ convert_html_tables(&html_table_inconsistent_first_row()),
+ expected
+ );
+}
+
#[test]
fn test_convert_html_table_empty() {
assert!(convert_html_tables(&html_table_empty()).is_empty());
|