Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/html-table-support.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,9 @@ rest of the document.
<tr><td>1</td><td>2</td></tr>
</table>
```

The converter checks the first table row for `<th>` cells or for `<strong>` or
`<b>` tags inside `<td>` elements to decide whether it is a header. If no such
markers exist and the table contains multiple rows, the first row is still
treated as the header, so the Markdown output includes a separator line. This
last-resort behaviour keeps simple tables readable after conversion.
50 changes: 29 additions & 21 deletions src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,24 @@ fn contains_strong(handle: &Handle) -> bool {
children.iter().any(contains_strong)
}

/// Extracts cell text from a row and reports whether all cells are header cells.
fn parse_row(row: &Handle) -> (Vec<String>, bool) {
let mut cells = Vec::new();
let mut all_header = true;
for child in row.children.borrow().iter() {
if is_table_cell(child) {
let is_header = if is_element(child, "th") {
true
} else {
contains_strong(child)
};
all_header &= is_header;
cells.push(node_text(child));
}
}
(cells, all_header)
}

/// Converts a `<table>` DOM node into Markdown table lines and calls
/// `reflow_table` so the columns are uniformly padded.
fn table_node_to_markdown(table: &Handle) -> Vec<String> {
Expand All @@ -122,33 +140,23 @@ fn table_node_to_markdown(table: &Handle) -> Vec<String> {
return Vec::new();
}

let (first_cells, explicit_header) = parse_row(&row_handles[0]);
let col_count = first_cells.len();
let fallback_header = !explicit_header && row_handles.len() > 1;
let has_header = explicit_header || fallback_header;

let mut out = Vec::new();
let mut first_header = false;
let mut col_count = 0;
for (i, row) in row_handles.iter().enumerate() {
let mut cells = Vec::new();
let mut all_header = true;
for child in row.children.borrow().iter() {
if is_table_cell(child) {
let is_header = if is_element(child, "th") {
true
} else {
contains_strong(child)
};
all_header &= is_header;
cells.push(node_text(child));
}
}
if i == 0 {
first_header = all_header;
col_count = cells.len();
}
out.push(format!("| {} |", first_cells.join(" | ")));
for row in row_handles.iter().skip(1) {
let (cells, _) = parse_row(row);
out.push(format!("| {} |", cells.join(" | ")));
}
if first_header {

if has_header {
let sep: Vec<String> = (0..col_count).map(|_| "---".to_string()).collect();
out.insert(1, format!("| {} |", sep.join(" | ")));
}

crate::reflow_table(&out)
}

Expand Down
56 changes: 55 additions & 1 deletion tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,36 @@ fn html_table_no_header() -> Vec<String> {
)
}

#[fixture]
fn html_table_empty_row() -> Vec<String> {
lines_vec!(
"<table>",
"<tr></tr>",
"<tr><td>1</td><td>2</td></tr>",
"</table>",
)
}

#[fixture]
fn html_table_whitespace_header() -> Vec<String> {
lines_vec!(
"<table>",
"<tr><td> </td><td> </td></tr>",
"<tr><td>1</td><td>2</td></tr>",
"</table>",
)
}

#[fixture]
fn html_table_inconsistent_first_row() -> Vec<String> {
lines_vec!(
"<table>",
"<tr><td>A</td></tr>",
"<tr><td>1</td><td>2</td></tr>",
"</table>",
)
}

#[fixture]
fn html_table_empty() -> Vec<String> {
let lines = lines_vec!("<table></table>");
Expand Down Expand Up @@ -457,10 +487,34 @@ fn test_convert_html_table_with_colspan() {

#[test]
fn test_convert_html_table_no_header() {
let expected = vec!["| A | B |", "| 1 | 2 |"];
let expected = vec!["| A | B |", "| --- | --- |", "| 1 | 2 |"];
Comment thread
leynos marked this conversation as resolved.
assert_eq!(convert_html_tables(&html_table_no_header()), expected);
}

#[test]
fn test_convert_html_table_empty_row() {
let expected = vec!["| 1 | 2 |", "| --- | --- |"];
assert_eq!(convert_html_tables(&html_table_empty_row()), expected);
}

#[test]
fn test_convert_html_table_whitespace_header() {
let expected = vec!["| --- | --- |", "| --- | --- |", "| 1 | 2 |"];
assert_eq!(
convert_html_tables(&html_table_whitespace_header()),
expected
);
}

#[test]
fn test_convert_html_table_inconsistent_first_row() {
let expected = vec!["| A |", "| --- |", "| 1 | 2 |"];
assert_eq!(
convert_html_tables(&html_table_inconsistent_first_row()),
expected
);
}

#[test]
fn test_convert_html_table_empty() {
assert!(convert_html_tables(&html_table_empty()).is_empty());
Expand Down