Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/html-table-support.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ consistently.
The crate `markup5ever_rcdom` provides a minimal DOM which `html5ever` populates
and which is traversed to extract rows and cells. Only basic tables containing
`<tr>`, `<th>` and `<td>` elements are supported.
`mdtablefix` detects table elements regardless of attribute usage or tag case.
12 changes: 8 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,9 @@
out: &mut Vec<String>,
) {
html_buf.push(line.trim_end().to_string());
*html_depth += line.matches("<table").count();
if line.contains("</table>") {
*html_depth = html_depth.saturating_sub(line.matches("</table>").count());
*html_depth += TABLE_START_RE.find_iter(line).count();
if TABLE_END_RE.is_match(line) {
*html_depth = html_depth.saturating_sub(TABLE_END_RE.find_iter(line).count());
if *html_depth == 0 {
out.extend(html_table_to_markdown(html_buf));
html_buf.clear();
Expand Down Expand Up @@ -256,6 +256,10 @@
std::sync::LazyLock::new(|| Regex::new(r"\|\s*\|\s*").unwrap());
static SEP_RE: std::sync::LazyLock<Regex> =
std::sync::LazyLock::new(|| Regex::new(r"^[\s|:-]+$").unwrap());
static TABLE_START_RE: std::sync::LazyLock<Regex> =

Check warning on line 259 in src/lib.rs

View check run for this annotation

Codecov / codecov/patch

src/lib.rs#L259

Added line #L259 was not covered by tests
std::sync::LazyLock::new(|| Regex::new(r"(?i)^<table(?:\s|>|$)").unwrap());
static TABLE_END_RE: std::sync::LazyLock<Regex> =
std::sync::LazyLock::new(|| Regex::new(r"(?i)</table>").unwrap());

#[must_use]
pub fn reflow_table(lines: &[String]) -> Vec<String> {
Expand Down Expand Up @@ -419,7 +423,7 @@
continue;
}

if line.trim_start().starts_with("<table") {
if TABLE_START_RE.is_match(line.trim_start()) {
if !buf.is_empty() {
if in_table {
out.extend(reflow_table(&buf));
Expand Down
48 changes: 48 additions & 0 deletions tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,36 @@ fn html_table() -> Vec<String> {
]
}

#[fixture]
fn html_table_with_attrs() -> Vec<String> {
vec![
"<table class=\"x\">".to_string(),
"<tr><th>A</th><th>B</th></tr>".to_string(),
"<tr><td>1</td><td>2</td></tr>".to_string(),
"</table>".to_string(),
]
}

#[fixture]
fn html_table_uppercase() -> Vec<String> {
vec![
"<TABLE>".to_string(),
"<tr><th>A</th><th>B</th></tr>".to_string(),
"<tr><td>1</td><td>2</td></tr>".to_string(),
"</TABLE>".to_string(),
]
}

#[fixture]
fn html_table_mixed_case() -> Vec<String> {
vec![
"<TaBlE>".to_string(),
"<tr><th>A</th><th>B</th></tr>".to_string(),
"<tr><td>1</td><td>2</td></tr>".to_string(),
"</TaBlE>".to_string(),
]
}

#[fixture]
fn multiple_tables() -> Vec<String> {
vec![
Expand Down Expand Up @@ -135,6 +165,24 @@ fn test_process_stream_html_table(html_table: Vec<String>) {
assert_eq!(process_stream(&html_table), expected);
}

#[rstest]
fn test_process_stream_html_table_with_attrs(html_table_with_attrs: Vec<String>) {
let expected = vec!["| A | B |", "| --- | --- |", "| 1 | 2 |"];
assert_eq!(process_stream(&html_table_with_attrs), expected);
}

#[rstest]
fn test_process_stream_html_table_uppercase(html_table_uppercase: Vec<String>) {
let expected = vec!["| A | B |", "| --- | --- |", "| 1 | 2 |"];
assert_eq!(process_stream(&html_table_uppercase), expected);
}

#[rstest]
fn test_process_stream_html_table_mixed_case(html_table_mixed_case: Vec<String>) {
let expected = vec!["| A | B |", "| --- | --- |", "| 1 | 2 |"];
assert_eq!(process_stream(&html_table_mixed_case), expected);
}

#[rstest]
fn test_process_stream_multiple_tables(multiple_tables: Vec<String>) {
let expected = vec![
Expand Down
Loading