larksuite · hanBufan · Apr 21, 2026 · coderabbitai · Apr 21, 2026 · coderabbitai
diff --git a/shortcuts/doc/markdown_fix.go b/shortcuts/doc/markdown_fix.go
@@ -13,31 +13,36 @@ import (
 // fixExportedMarkdown applies post-processing to Lark-exported Markdown to
 // improve round-trip fidelity on re-import:
 //
-//  1. fixBoldSpacing: removes trailing whitespace before closing ** / *,
+//  1. fixLarkTables: converts Feishu XML tables (<lark-table>, <lark-tr>,
+//     <lark-td>) to standard Markdown tables. Applied only outside fenced
+//     code blocks. Tables with merged cells (colspan/rowspan) are skipped.
+//
+//  2. fixBoldSpacing: removes trailing whitespace before closing ** / *,
 //     and strips redundant ** from ATX headings. Applied only outside fenced
 //     code blocks, and skips inline code spans.
 //
-//  2. normalizeNestedListIndentation: rewrites space-pair-indented nested list
+//  3. normalizeNestedListIndentation: rewrites space-pair-indented nested list
 //     markers to tab-indented markers. This avoids nested ordered list items
 //     being flattened or interpreted as plain text/code on re-import.
 //
-//  3. fixSetextAmbiguity: inserts a blank line before any "---" that immediately
+//  4. fixSetextAmbiguity: inserts a blank line before any "---" that immediately
 //     follows a non-empty line, preventing it from being parsed as a Setext H2.
 //     Applied only outside fenced code blocks.
 //
-//  4. fixBlockquoteHardBreaks: inserts a blank blockquote line (">") between
+//  5. fixBlockquoteHardBreaks: inserts a blank blockquote line (">") between
 //     consecutive blockquote content lines so create-doc preserves line breaks.
 //     Applied only outside fenced code blocks.
 //
-//  5. fixTopLevelSoftbreaks: inserts a blank line between adjacent non-empty
+//  6. fixTopLevelSoftbreaks: inserts a blank line between adjacent non-empty
 //     lines at the top level and inside content containers (callout,
 //     quote-container, lark-td). Code fences are left untouched, and
 //     consecutive list items / continuations are not separated.
 //
-//  6. fixCalloutEmoji: replaces named emoji aliases (e.g. emoji="warning") with
+//  7. fixCalloutEmoji: replaces named emoji aliases (e.g. emoji="warning") with
 //     actual Unicode emoji characters that create-doc understands. Applied only
 //     outside fenced code blocks.
 func fixExportedMarkdown(md string) string {
+	md = applyOutsideCodeFences(md, fixLarkTables)
 	md = applyOutsideCodeFences(md, fixBoldSpacing)
 	md = applyOutsideCodeFences(md, normalizeNestedListIndentation)
 	md = applyOutsideCodeFences(md, fixSetextAmbiguity)
@@ -538,3 +543,66 @@ func fixTopLevelSoftbreaks(md string) string {
 
 	return strings.Join(out, "\n")
 }
+
+// fixLarkTables converts Feishu/Lark XML tables to standard Markdown tables.
+// It handles:
+//   - Simple tables with rows (<lark-tr>) and cells (<lark-td>)
+//   - Empty cells
+//   - Cells containing pipe characters (escaped as \|)
+//   - Multiline cell content (converted to <br/>)
+//   - Tables with merged cells are skipped (colspan/rowspan attributes)
+func fixLarkTables(md string) string {
+	// Match entire <lark-table>...</lark-table> blocks
+	tableRe := regexp.MustCompile(`(?s)<lark-table[^>]*>(.*?)</lark-table>`)
+	return tableRe.ReplaceAllStringFunc(md, func(tableMatch string) string {
+		// Check for merged cells - if present, skip conversion and keep XML
+		if strings.Contains(tableMatch, "colspan=") || strings.Contains(tableMatch, "rowspan=") {
+			return tableMatch
+		}
+
+		// Extract all rows
+		rowRe := regexp.MustCompile(`(?s)<lark-tr[^>]*>(.*?)</lark-tr>`)
+		rows := rowRe.FindAllStringSubmatch(tableMatch, -1)
+		if len(rows) == 0 {
+			return tableMatch
+		}
+
+		var mdRows []string
+		colCount := 0
+
+		for _, row := range rows {
+			cellRe := regexp.MustCompile(`(?s)<lark-td[^>]*>(.*?)</lark-td>`)
+			cells := cellRe.FindAllStringSubmatch(row[1], -1)
+			if len(cells) == 0 {
+				continue
+			}
+
+			var cellContents []string
+			for _, cell := range cells {
+				content := strings.TrimSpace(cell[1])
+				// Handle multiline content
+				content = strings.ReplaceAll(content, "\n", "<br/>")
+				// Escape pipe characters
+				content = strings.ReplaceAll(content, "|", `\|`)
+				cellContents = append(cellContents, content)
+			}
+
+			mdRows = append(mdRows, "| "+strings.Join(cellContents, " | ")+" |")
+			if len(cellContents) > colCount {
+				colCount = len(cellContents)
+			}
+		}
+
+		if len(mdRows) == 0 {
+			return tableMatch
+		}
+
+		// Build separator row after the first row (header)
+		separator := "|" + strings.Repeat(" --- |", colCount)
+		if len(mdRows) > 0 {
+			mdRows = append([]string{mdRows[0], separator}, mdRows[1:]...)
+		}
+
+		return strings.Join(mdRows, "\n")
+	})
+}
diff --git a/shortcuts/doc/markdown_fix_hardening_test.go b/shortcuts/doc/markdown_fix_hardening_test.go
@@ -70,6 +70,20 @@ func TestFixExportedMarkdownIdempotent(t *testing.T) {
 			"</quote-container>",
 			"",
 		}, "\n"),
+
+			"lark-table converted to markdown": strings.Join([]string{
+				"<lark-table>",
+				"<lark-tr>",
+				"<lark-td>Header</lark-td>",
+				"<lark-td>Value</lark-td>",
+				"</lark-tr>",
+				"<lark-tr>",
+				"<lark-td>Data</lark-td>",
+				"<lark-td>123</lark-td>",
+				"</lark-tr>",
+				"</lark-table>",
+				"",
+			}, "\n"),
 	}
 
 	for name, fixture := range fixtures {

diff --git a/shortcuts/doc/markdown_fix_test.go b/shortcuts/doc/markdown_fix_test.go
@@ -438,3 +438,174 @@ func TestFixTopLevelSoftbreaksQuoteContainer(t *testing.T) {
 		t.Errorf("fixTopLevelSoftbreaks quote-container = %q, want %q", got, want)
 	}
 }
+
+func TestFixLarkTables(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+		want  string
+	}{
+		{
+			name: "simple 2x2 table",
+			input: `<lark-table>
+<lark-tr>
+<lark-td>Header 1</lark-td>
+<lark-td>Header 2</lark-td>
+</lark-tr>
+<lark-tr>
+<lark-td>Cell 1</lark-td>
+<lark-td>Cell 2</lark-td>
+</lark-tr>
+</lark-table>`,
+			want: `| Header 1 | Header 2 |
+| --- | --- |
+| Cell 1 | Cell 2 |`,
+		},
+		{
+			name: "table with empty cells",
+			input: `<lark-table>
+<lark-tr>
+<lark-td>A</lark-td>
+<lark-td></lark-td>
+</lark-tr>
+<lark-tr>
+<lark-td></lark-td>
+<lark-td>B</lark-td>
+</lark-tr>
+</lark-table>`,
+			want: `| A |  |
+| --- | --- |
+|  | B |`,
+		},
+		{
+			name: "table with pipe character escaped",
+			input: `<lark-table>
+<lark-tr>
+<lark-td>a|b</lark-td>
+<lark-td>c|d</lark-td>
+</lark-tr>
+</lark-table>`,
+			want: `| a\|b | c\|d |
+| --- | --- |`,
+		},
+		{
+			name: "table with multiline content",
+			input: `<lark-table>
+<lark-tr>
+<lark-td>line1
+line2</lark-td>
+<lark-td>single</lark-td>
+</lark-tr>
+</lark-table>`,
+			want: `| line1<br/>line2 | single |
+| --- | --- |`,
+		},
+		{
+			name: "table with merged cells not converted",
+			input: `<lark-table>
+<lark-tr>
+<lark-td colspan="2">merged</lark-td>
+</lark-tr>
+</lark-table>`,
+			want: `<lark-table>
+<lark-tr>
+<lark-td colspan="2">merged</lark-td>
+</lark-tr>
+</lark-table>`,
+		},
+		{
+			name: "table inside code block not converted",
+			input: "```\n<lark-table>\n<lark-tr>\n<lark-td>cell</lark-td>\n</lark-tr>\n</lark-table>\n```",
+			want: "```\n<lark-table>\n<lark-tr>\n<lark-td>cell</lark-td>\n</lark-tr>\n</lark-table>\n```",
+		},
+		{
+			name: "multiple tables in document",
+			input: `# Title
+
+<lark-table>
+<lark-tr>
+<lark-td>A</lark-td>
+</lark-tr>
+</lark-table>
+
+More text.
+
+<lark-table>
+<lark-tr>
+<lark-td>B</lark-td>
+</lark-tr>
+</lark-table>`,
+			want: `# Title
+
+| A |
+| --- |
+
+More text.
+
+| B |
+| --- |`,
+		},
+		{
+			name: "table with whitespace in cells",
+			input: `<lark-table>
+<lark-tr>
+<lark-td>
+  content
+</lark-td>
+</lark-tr>
+</lark-table>`,
+			want: `| content |
+| --- |`,
+		},
+		{
+			name: "table with attributes on tags",
+			input: `<lark-table id="tbl1">
+<lark-tr class="row">
+<lark-td style="bold">Data</lark-td>
+</lark-tr>
+</lark-table>`,
+			want: `| Data |
+| --- |`,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := applyOutsideCodeFences(tt.input, fixLarkTables)
+			if got != tt.want {
+				t.Errorf("fixLarkTables(%q) = %q, want %q", tt.input, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestFixLarkTablesIntegrated(t *testing.T) {
+	// Test that fixLarkTables is applied in fixExportedMarkdown
+	input := `# Document
+
+<lark-table>
+<lark-tr>
+<lark-td>Header</lark-td>
+</lark-tr>
+<lark-tr>
+<lark-td>Data</lark-td>
+</lark-tr>
+</lark-table>
+
+End.`
+	result := fixExportedMarkdown(input)
+
+	// Should contain markdown table format
+	if !strings.Contains(result, "| Header |") {
+		t.Error("expected markdown table header in output")
+	}
+	if !strings.Contains(result, "| --- |") {
+		t.Error("expected markdown table separator in output")
+	}
+	if !strings.Contains(result, "| Data |") {
+		t.Error("expected markdown table data row in output")
+	}
+	// Should NOT contain XML tags
+	if strings.Contains(result, "<lark-table>") {
+		t.Error("expected lark-table XML to be converted")
+	}
+}