diff --git a/shortcuts/doc/markdown_fix.go b/shortcuts/doc/markdown_fix.go
index f9b28dccf..1ead7a61c 100644
--- a/shortcuts/doc/markdown_fix.go
+++ b/shortcuts/doc/markdown_fix.go
@@ -6,6 +6,8 @@ package doc
import (
"regexp"
"strings"
+ "unicode"
+ "unicode/utf8"
)
// fixExportedMarkdown applies post-processing to Lark-exported Markdown to
@@ -15,24 +17,29 @@ import (
// and strips redundant ** from ATX headings. Applied only outside fenced
// code blocks, and skips inline code spans.
//
-// 2. fixSetextAmbiguity: inserts a blank line before any "---" that immediately
+// 2. normalizeNestedListIndentation: rewrites space-pair-indented nested list
+// markers to tab-indented markers. This avoids nested ordered list items
+// being flattened or interpreted as plain text/code on re-import.
+//
+// 3. fixSetextAmbiguity: inserts a blank line before any "---" that immediately
// follows a non-empty line, preventing it from being parsed as a Setext H2.
// Applied only outside fenced code blocks.
//
-// 3. fixBlockquoteHardBreaks: inserts a blank blockquote line (">") between
+// 4. fixBlockquoteHardBreaks: inserts a blank blockquote line (">") between
// consecutive blockquote content lines so create-doc preserves line breaks.
// Applied only outside fenced code blocks.
//
-// 4. fixTopLevelSoftbreaks: inserts a blank line between adjacent non-empty
+// 5. fixTopLevelSoftbreaks: inserts a blank line between adjacent non-empty
// lines at the top level and inside content containers (callout,
// quote-container, lark-td). Code fences are left untouched, and
// consecutive list items / continuations are not separated.
//
-// 5. fixCalloutEmoji: replaces named emoji aliases (e.g. emoji="warning") with
+// 6. fixCalloutEmoji: replaces named emoji aliases (e.g. emoji="warning") with
// actual Unicode emoji characters that create-doc understands. Applied only
// outside fenced code blocks.
func fixExportedMarkdown(md string) string {
md = applyOutsideCodeFences(md, fixBoldSpacing)
+ md = applyOutsideCodeFences(md, normalizeNestedListIndentation)
md = applyOutsideCodeFences(md, fixSetextAmbiguity)
md = applyOutsideCodeFences(md, fixBlockquoteHardBreaks)
md = fixTopLevelSoftbreaks(md)
@@ -106,20 +113,21 @@ func fixBlockquoteHardBreaks(md string) string {
return strings.Join(out, "\n")
}
-// fixBoldSpacing fixes two issues with bold markers exported by Lark:
+// fixBoldSpacing normalizes emphasis markers exported by Lark while preserving
+// inline code spans:
+//
+// 1. Removes leading whitespace after opening ** and * delimiters:
+// "** text**" → "**text**", "* text*" → "*text*"
//
-// 1. Trailing whitespace before closing **: "**text **" → "**text**"
-// CommonMark requires no space before a closing delimiter; otherwise the
-// ** is rendered as literal text.
+// 2. Removes trailing whitespace before closing ** and * delimiters:
+// "**text **" → "**text**", "*text *" → "*text*"
//
-// 2. Redundant bold in ATX headings: "# **text**" → "# text"
-// Headings are already bold, so the inner ** is visually redundant and
-// some renderers display the markers literally.
+// 3. Removes redundant bold around an entire ATX heading:
+// "# **text**" → "# text"
//
-// Both fixes skip inline code spans to avoid modifying literal code content.
+// The bold and italic spacing fixes only run on non-code segments so literal
+// code content is left unchanged.
var (
- boldTrailingSpaceRe = regexp.MustCompile(`(\*\*\S[^*]*?)\s+(\*\*)`)
- italicTrailingSpaceRe = regexp.MustCompile(`(\*\S[^*]*?)\s+(\*)`)
// headingBoldRe uses [^*]+ (no asterisks) to avoid mismatching headings
// that contain multiple disjoint bold spans such as "# **foo** and **bar**".
headingBoldRe = regexp.MustCompile(`(?m)^(#{1,6})\s+\*\*([^*]+)\*\*\s*$`)
@@ -182,38 +190,116 @@ func scanInlineCodeSpans(line string) [][2]int {
// fixBoldSpacingLine applies bold/italic trailing-space fixes to a single line,
// skipping content inside inline code spans to avoid corrupting literal code.
// ATX heading lines are also skipped here because headingBoldRe in fixBoldSpacing
-// handles them separately and boldTrailingSpaceRe can misfire on headings with
-// multiple disjoint bold spans (e.g. "# **foo** and **bar**").
+// handles them separately, keeping heading-only normalization isolated from the
+// inline emphasis spacing scanner below.
func fixBoldSpacingLine(line string) string {
if atxHeadingRe.MatchString(line) {
return line
}
spans := scanInlineCodeSpans(line)
if len(spans) == 0 {
- line = boldTrailingSpaceRe.ReplaceAllString(line, "$1$2")
- line = italicTrailingSpaceRe.ReplaceAllString(line, "$1$2")
- return line
+ return fixEmphasisSpacingSegment(line)
}
var sb strings.Builder
pos := 0
for _, loc := range spans {
// Process the non-code segment before this inline code span.
seg := line[pos:loc[0]]
- seg = boldTrailingSpaceRe.ReplaceAllString(seg, "$1$2")
- seg = italicTrailingSpaceRe.ReplaceAllString(seg, "$1$2")
- sb.WriteString(seg)
+ sb.WriteString(fixEmphasisSpacingSegment(seg))
// Preserve inline code span as-is.
sb.WriteString(line[loc[0]:loc[1]])
pos = loc[1]
}
// Remaining non-code segment after the last code span.
- seg := line[pos:]
- seg = boldTrailingSpaceRe.ReplaceAllString(seg, "$1$2")
- seg = italicTrailingSpaceRe.ReplaceAllString(seg, "$1$2")
- sb.WriteString(seg)
+ sb.WriteString(fixEmphasisSpacingSegment(line[pos:]))
return sb.String()
}
+// fixEmphasisSpacingSegment trims only the whitespace immediately inside simple
+// *...* and **...** spans. It deliberately ignores runs of 3+ asterisks and
+// any candidate whose payload contains another asterisk so nested emphasis-like
+// text remains untouched. When both inner sides contain whitespace, single-rune
+// payloads are preserved as literal text (for example "* x *" and "** x **").
+func fixEmphasisSpacingSegment(seg string) string {
+ if !strings.Contains(seg, "*") {
+ return seg
+ }
+
+ var sb strings.Builder
+ pos := 0
+ for pos < len(seg) {
+ openStart, openEnd, ok := nextAsteriskRun(seg, pos)
+ if !ok {
+ sb.WriteString(seg[pos:])
+ break
+ }
+
+ sb.WriteString(seg[pos:openStart])
+
+ markerLen := openEnd - openStart
+ if markerLen != 1 && markerLen != 2 {
+ sb.WriteString(seg[openStart:openEnd])
+ pos = openEnd
+ continue
+ }
+
+ closeStart, closeEnd, ok := nextAsteriskRun(seg, openEnd)
+ if !ok || closeEnd-closeStart != markerLen {
+ sb.WriteString(seg[openStart:openEnd])
+ pos = openEnd
+ continue
+ }
+
+ payload := seg[openEnd:closeStart]
+ normalized, shouldNormalize := normalizeEmphasisPayload(payload)
+ if !shouldNormalize {
+ sb.WriteString(seg[openStart:closeEnd])
+ pos = closeEnd
+ continue
+ }
+
+ marker := seg[openStart:openEnd]
+ sb.WriteString(marker)
+ sb.WriteString(normalized)
+ sb.WriteString(marker)
+ pos = closeEnd
+ }
+ return sb.String()
+}
+
+func nextAsteriskRun(s string, start int) (runStart, runEnd int, ok bool) {
+ for i := start; i < len(s); i++ {
+ if s[i] != '*' {
+ continue
+ }
+ j := i
+ for j < len(s) && s[j] == '*' {
+ j++
+ }
+ return i, j, true
+ }
+ return 0, 0, false
+}
+
+func normalizeEmphasisPayload(payload string) (string, bool) {
+ trimmedLeft := strings.TrimLeftFunc(payload, unicode.IsSpace)
+ trimmed := strings.TrimRightFunc(trimmedLeft, unicode.IsSpace)
+ if trimmed == "" {
+ return payload, false
+ }
+
+ hasLeadingSpace := len(trimmedLeft) != len(payload)
+ hasTrailingSpace := len(trimmed) != len(trimmedLeft)
+ if !hasLeadingSpace && !hasTrailingSpace {
+ return payload, true
+ }
+
+ if hasLeadingSpace && hasTrailingSpace && utf8.RuneCountInString(trimmed) == 1 {
+ return payload, false
+ }
+ return trimmed, true
+}
+
var setextRe = regexp.MustCompile(`(?m)^([^\n]+)\n(-{3,}\s*$)`)
func fixSetextAmbiguity(md string) string {
@@ -291,6 +377,44 @@ var contentContainers = [][2]string{
// indented (nested) items.
var listItemRe = regexp.MustCompile(`^[ \t]*([-*+]|\d+[.)]) `)
+// nestedListIndentRe matches nested list item markers indented with pairs of
+// spaces. We rewrite those space pairs to tabs because some downstream
+// round-trip paths treat multi-space indented ordered items as flat items or
+// literal text, while tab indentation remains nested and avoids 4-space code
+// block ambiguity.
+var nestedListIndentRe = regexp.MustCompile(`^( {2,})([-*+]|\d+[.)]) `)
+
+func normalizeNestedListIndentation(md string) string {
+ lines := strings.Split(md, "\n")
+ for i, line := range lines {
+ matches := nestedListIndentRe.FindStringSubmatch(line)
+ if len(matches) != 3 {
+ continue
+ }
+ if !hasPreviousNonBlankListItem(lines, i) {
+ continue
+ }
+ indent := matches[1]
+ if len(indent)%2 != 0 {
+ continue
+ }
+ tabs := strings.Repeat("\t", len(indent)/2)
+ lines[i] = tabs + line[len(indent):]
+ }
+ return strings.Join(lines, "\n")
+}
+
+func hasPreviousNonBlankListItem(lines []string, index int) bool {
+ for i := index - 1; i >= 0; i-- {
+ trimmed := strings.TrimSpace(lines[i])
+ if trimmed == "" {
+ return false
+ }
+ return listItemRe.MatchString(lines[i])
+ }
+ return false
+}
+
// isListItemOrContinuation returns true for lines that are part of a list:
// either a list item marker line or an indented continuation of a list item.
// This is used to prevent blank lines being inserted between tight list lines,
diff --git a/shortcuts/doc/markdown_fix_test.go b/shortcuts/doc/markdown_fix_test.go
index b47ab7785..81ac26a9a 100644
--- a/shortcuts/doc/markdown_fix_test.go
+++ b/shortcuts/doc/markdown_fix_test.go
@@ -14,6 +14,56 @@ func TestFixBoldSpacing(t *testing.T) {
input string
want string
}{
+ {
+ name: "leading space after opening bold",
+ input: "** hello**",
+ want: "**hello**",
+ },
+ {
+ name: "leading space after opening italic",
+ input: "* hello*",
+ want: "*hello*",
+ },
+ {
+ name: "leading and trailing spaces inside bold are collapsed",
+ input: "** hello **",
+ want: "**hello**",
+ },
+ {
+ name: "leading and trailing spaces inside italic are collapsed",
+ input: "* hello *",
+ want: "*hello*",
+ },
+ {
+ name: "multiple spaced italic spans on one line are each collapsed",
+ input: "* a* * b*",
+ want: "*a* *b*",
+ },
+ {
+ name: "ambiguous italic span stays literal",
+ input: "2 * x * y",
+ want: "2 * x * y",
+ },
+ {
+ name: "ambiguous bold span stays literal",
+ input: "2 ** x ** y",
+ want: "2 ** x ** y",
+ },
+ {
+ name: "single-rune italic with spaces on both sides stays literal",
+ input: "* x *",
+ want: "* x *",
+ },
+ {
+ name: "single-rune bold with spaces on both sides stays literal",
+ input: "** x **",
+ want: "** x **",
+ },
+ {
+ name: "triple-asterisk near miss stays literal",
+ input: "*** hello**",
+ want: "*** hello**",
+ },
{
name: "trailing space before closing bold",
input: "**hello **",
@@ -54,6 +104,16 @@ func TestFixBoldSpacing(t *testing.T) {
input: "**foo ** and `**bar **`",
want: "**foo** and `**bar **`",
},
+ {
+ name: "inline code with spaced italic stays literal while outside span is fixed",
+ input: "`* hello *` and * hello *",
+ want: "`* hello *` and *hello*",
+ },
+ {
+ name: "opening space inside text tag fixed",
+ input: `** Helpful - 有用性:**`,
+ want: `**Helpful - 有用性:**`,
+ },
{
name: "double-backtick inline code not modified",
input: "``**hello **`` and **world **",
@@ -222,6 +282,53 @@ func TestFixTopLevelSoftbreaks(t *testing.T) {
}
}
+func TestNormalizeNestedListIndentation(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ want string
+ }{
+ {
+ name: "nested ordered list uses tabs instead of space pairs",
+ input: "1. parent\n 1. child\n 1. grandchild",
+ want: "1. parent\n\t1. child\n\t\t1. grandchild",
+ },
+ {
+ name: "nested mixed list markers use tabs instead of space pairs",
+ input: "- parent\n - child\n 1. grandchild",
+ want: "- parent\n\t- child\n\t\t1. grandchild",
+ },
+ {
+ name: "top-level list unchanged",
+ input: "1. parent\n2. sibling",
+ want: "1. parent\n2. sibling",
+ },
+ {
+ name: "indented top-level marker without parent list stays unchanged",
+ input: "paragraph\n\n 1. item",
+ want: "paragraph\n\n 1. item",
+ },
+ {
+ name: "blank-line-separated loose-list sibling stays unchanged",
+ input: "1. a\n\n 1. b",
+ want: "1. a\n\n 1. b",
+ },
+ {
+ name: "indented code block inside list item stays unchanged",
+ input: "- parent\n\n 1. code",
+ want: "- parent\n\n 1. code",
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := normalizeNestedListIndentation(tt.input)
+ if got != tt.want {
+ t.Errorf("normalizeNestedListIndentation(%q) = %q, want %q", tt.input, got, tt.want)
+ }
+ })
+ }
+}
+
func TestFixExportedMarkdown(t *testing.T) {
// End-to-end: all fixes applied together
input := "# **Title**\nparagraph one\nparagraph two\n**bold **\n> q1\n> q2\nsome text\n---"