diff --git a/docs/html-table-support.md b/docs/html-table-support.md
index b3c31ec6..e5e4cf91 100644
--- a/docs/html-table-support.md
+++ b/docs/html-table-support.md
@@ -8,3 +8,4 @@ consistently.
The crate `markup5ever_rcdom` provides a minimal DOM which `html5ever` populates
and which is traversed to extract rows and cells. Only basic tables containing
`
`, `| ` and ` | ` elements are supported.
+`mdtablefix` detects table elements regardless of attribute usage or tag case.
diff --git a/src/lib.rs b/src/lib.rs
index e49cda51..a39b7d67 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -219,9 +219,9 @@ fn push_html_line(
out: &mut Vec,
) {
html_buf.push(line.trim_end().to_string());
- *html_depth += line.matches("") {
- *html_depth = html_depth.saturating_sub(line.matches(" ").count());
+ *html_depth += TABLE_START_RE.find_iter(line).count();
+ if TABLE_END_RE.is_match(line) {
+ *html_depth = html_depth.saturating_sub(TABLE_END_RE.find_iter(line).count());
if *html_depth == 0 {
out.extend(html_table_to_markdown(html_buf));
html_buf.clear();
@@ -256,6 +256,10 @@ static SENTINEL_RE: std::sync::LazyLock =
std::sync::LazyLock::new(|| Regex::new(r"\|\s*\|\s*").unwrap());
static SEP_RE: std::sync::LazyLock =
std::sync::LazyLock::new(|| Regex::new(r"^[\s|:-]+$").unwrap());
+static TABLE_START_RE: std::sync::LazyLock =
+ std::sync::LazyLock::new(|| Regex::new(r"(?i)^|$)").unwrap());
+static TABLE_END_RE: std::sync::LazyLock =
+ std::sync::LazyLock::new(|| Regex::new(r"(?i) ").unwrap());
#[must_use]
pub fn reflow_table(lines: &[String]) -> Vec {
@@ -419,7 +423,7 @@ pub fn process_stream(lines: &[String]) -> Vec {
continue;
}
- if line.trim_start().starts_with(" Vec {
]
}
+#[fixture]
+fn html_table_with_attrs() -> Vec {
+ vec![
+ "".to_string(),
+ "| A | B | ".to_string(),
+ "| 1 | 2 | ".to_string(),
+ " ".to_string(),
+ ]
+}
+
+#[fixture]
+fn html_table_uppercase() -> Vec {
+ vec![
+ "".to_string(),
+ "| A | B | ".to_string(),
+ "| 1 | 2 | ".to_string(),
+ " ".to_string(),
+ ]
+}
+
+#[fixture]
+fn html_table_mixed_case() -> Vec {
+ vec![
+ "".to_string(),
+ "| A | B | ".to_string(),
+ "| 1 | 2 | ".to_string(),
+ " ".to_string(),
+ ]
+}
+
#[fixture]
fn multiple_tables() -> Vec {
vec![
@@ -135,6 +165,24 @@ fn test_process_stream_html_table(html_table: Vec) {
assert_eq!(process_stream(&html_table), expected);
}
+#[rstest]
+fn test_process_stream_html_table_with_attrs(html_table_with_attrs: Vec) {
+ let expected = vec!["| A | B |", "| --- | --- |", "| 1 | 2 |"];
+ assert_eq!(process_stream(&html_table_with_attrs), expected);
+}
+
+#[rstest]
+fn test_process_stream_html_table_uppercase(html_table_uppercase: Vec) {
+ let expected = vec!["| A | B |", "| --- | --- |", "| 1 | 2 |"];
+ assert_eq!(process_stream(&html_table_uppercase), expected);
+}
+
+#[rstest]
+fn test_process_stream_html_table_mixed_case(html_table_mixed_case: Vec) {
+ let expected = vec!["| A | B |", "| --- | --- |", "| 1 | 2 |"];
+ assert_eq!(process_stream(&html_table_mixed_case), expected);
+}
+
#[rstest]
fn test_process_stream_multiple_tables(multiple_tables: Vec) {
let expected = vec![
|