diff --git a/docs/html-table-support.md b/docs/html-table-support.md index 3c64cc74..6e255006 100644 --- a/docs/html-table-support.md +++ b/docs/html-table-support.md @@ -16,3 +16,9 @@ rest of the document. 12 ``` + +The converter checks the first table row for `` cells or for `` or +`` tags inside `` elements to decide whether it is a header. If no such +markers exist and the table contains multiple rows, the first row is still +treated as the header, so the Markdown output includes a separator line. This +last-resort behaviour keeps simple tables readable after conversion. diff --git a/src/html.rs b/src/html.rs index 83b347a9..61a867ee 100644 --- a/src/html.rs +++ b/src/html.rs @@ -113,6 +113,24 @@ fn contains_strong(handle: &Handle) -> bool { children.iter().any(contains_strong) } +/// Extracts cell text from a row and reports whether all cells are header cells. +fn parse_row(row: &Handle) -> (Vec, bool) { + let mut cells = Vec::new(); + let mut all_header = true; + for child in row.children.borrow().iter() { + if is_table_cell(child) { + let is_header = if is_element(child, "th") { + true + } else { + contains_strong(child) + }; + all_header &= is_header; + cells.push(node_text(child)); + } + } + (cells, all_header) +} + /// Converts a `` DOM node into Markdown table lines and calls /// `reflow_table` so the columns are uniformly padded. fn table_node_to_markdown(table: &Handle) -> Vec { @@ -122,33 +140,23 @@ fn table_node_to_markdown(table: &Handle) -> Vec { return Vec::new(); } + let (first_cells, explicit_header) = parse_row(&row_handles[0]); + let col_count = first_cells.len(); + let fallback_header = !explicit_header && row_handles.len() > 1; + let has_header = explicit_header || fallback_header; + let mut out = Vec::new(); - let mut first_header = false; - let mut col_count = 0; - for (i, row) in row_handles.iter().enumerate() { - let mut cells = Vec::new(); - let mut all_header = true; - for child in row.children.borrow().iter() { - if is_table_cell(child) { - let is_header = if is_element(child, "th") { - true - } else { - contains_strong(child) - }; - all_header &= is_header; - cells.push(node_text(child)); - } - } - if i == 0 { - first_header = all_header; - col_count = cells.len(); - } + out.push(format!("| {} |", first_cells.join(" | "))); + for row in row_handles.iter().skip(1) { + let (cells, _) = parse_row(row); out.push(format!("| {} |", cells.join(" | "))); } - if first_header { + + if has_header { let sep: Vec = (0..col_count).map(|_| "---".to_string()).collect(); out.insert(1, format!("| {} |", sep.join(" | "))); } + crate::reflow_table(&out) } diff --git a/tests/integration.rs b/tests/integration.rs index 959685b8..c7b738ee 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -101,6 +101,36 @@ fn html_table_no_header() -> Vec { ) } +#[fixture] +fn html_table_empty_row() -> Vec { + lines_vec!( + "
", + "", + "", + "
12
", + ) +} + +#[fixture] +fn html_table_whitespace_header() -> Vec { + lines_vec!( + "", + "", + "", + "
12
", + ) +} + +#[fixture] +fn html_table_inconsistent_first_row() -> Vec { + lines_vec!( + "", + "", + "", + "
A
12
", + ) +} + #[fixture] fn html_table_empty() -> Vec { let lines = lines_vec!("
"); @@ -457,10 +487,34 @@ fn test_convert_html_table_with_colspan() { #[test] fn test_convert_html_table_no_header() { - let expected = vec!["| A | B |", "| 1 | 2 |"]; + let expected = vec!["| A | B |", "| --- | --- |", "| 1 | 2 |"]; assert_eq!(convert_html_tables(&html_table_no_header()), expected); } +#[test] +fn test_convert_html_table_empty_row() { + let expected = vec!["| 1 | 2 |", "| --- | --- |"]; + assert_eq!(convert_html_tables(&html_table_empty_row()), expected); +} + +#[test] +fn test_convert_html_table_whitespace_header() { + let expected = vec!["| --- | --- |", "| --- | --- |", "| 1 | 2 |"]; + assert_eq!( + convert_html_tables(&html_table_whitespace_header()), + expected + ); +} + +#[test] +fn test_convert_html_table_inconsistent_first_row() { + let expected = vec!["| A |", "| --- |", "| 1 | 2 |"]; + assert_eq!( + convert_html_tables(&html_table_inconsistent_first_row()), + expected + ); +} + #[test] fn test_convert_html_table_empty() { assert!(convert_html_tables(&html_table_empty()).is_empty());