From 2c8b998f58f597906dc6cd8bbbea5b953c5770e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AB=98=E6=98=A5=E6=99=96?= <18220699480@163.com> Date: Tue, 14 Apr 2026 18:17:31 +0800 Subject: [PATCH 1/4] fix(doc): preserve round-trip formatting in fetch output - trim leading spaces inside bold and italic emphasis exported by docs +fetch - normalize nested list indentation to avoid flattening and literal text on re-import - add regression tests for emphasis spacing and nested list indentation --- shortcuts/doc/markdown_fix.go | 45 +++++++++++++++++++++++++--- shortcuts/doc/markdown_fix_test.go | 47 ++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 4 deletions(-) diff --git a/shortcuts/doc/markdown_fix.go b/shortcuts/doc/markdown_fix.go index f9b28dccf..bc7d11b56 100644 --- a/shortcuts/doc/markdown_fix.go +++ b/shortcuts/doc/markdown_fix.go @@ -15,24 +15,29 @@ import ( // and strips redundant ** from ATX headings. Applied only outside fenced // code blocks, and skips inline code spans. // -// 2. fixSetextAmbiguity: inserts a blank line before any "---" that immediately +// 2. normalizeNestedListIndentation: rewrites space-pair-indented nested list +// markers to tab-indented markers. This avoids nested ordered list items +// being flattened or interpreted as plain text/code on re-import. +// +// 3. fixSetextAmbiguity: inserts a blank line before any "---" that immediately // follows a non-empty line, preventing it from being parsed as a Setext H2. // Applied only outside fenced code blocks. // -// 3. fixBlockquoteHardBreaks: inserts a blank blockquote line (">") between +// 4. fixBlockquoteHardBreaks: inserts a blank blockquote line (">") between // consecutive blockquote content lines so create-doc preserves line breaks. // Applied only outside fenced code blocks. // -// 4. fixTopLevelSoftbreaks: inserts a blank line between adjacent non-empty +// 5. fixTopLevelSoftbreaks: inserts a blank line between adjacent non-empty // lines at the top level and inside content containers (callout, // quote-container, lark-td). Code fences are left untouched, and // consecutive list items / continuations are not separated. // -// 5. fixCalloutEmoji: replaces named emoji aliases (e.g. emoji="warning") with +// 6. fixCalloutEmoji: replaces named emoji aliases (e.g. emoji="warning") with // actual Unicode emoji characters that create-doc understands. Applied only // outside fenced code blocks. func fixExportedMarkdown(md string) string { md = applyOutsideCodeFences(md, fixBoldSpacing) + md = applyOutsideCodeFences(md, normalizeNestedListIndentation) md = applyOutsideCodeFences(md, fixSetextAmbiguity) md = applyOutsideCodeFences(md, fixBlockquoteHardBreaks) md = fixTopLevelSoftbreaks(md) @@ -118,7 +123,9 @@ func fixBlockquoteHardBreaks(md string) string { // // Both fixes skip inline code spans to avoid modifying literal code content. var ( + boldLeadingSpaceRe = regexp.MustCompile(`(\*\*)\s+([^*\n].*?)(\*\*)`) boldTrailingSpaceRe = regexp.MustCompile(`(\*\*\S[^*]*?)\s+(\*\*)`) + italicLeadingSpaceRe = regexp.MustCompile(`(\*)\s+([^*\n].*?)(\*)`) italicTrailingSpaceRe = regexp.MustCompile(`(\*\S[^*]*?)\s+(\*)`) // headingBoldRe uses [^*]+ (no asterisks) to avoid mismatching headings // that contain multiple disjoint bold spans such as "# **foo** and **bar**". @@ -190,7 +197,9 @@ func fixBoldSpacingLine(line string) string { } spans := scanInlineCodeSpans(line) if len(spans) == 0 { + line = boldLeadingSpaceRe.ReplaceAllString(line, "$1$2$3") line = boldTrailingSpaceRe.ReplaceAllString(line, "$1$2") + line = italicLeadingSpaceRe.ReplaceAllString(line, "$1$2$3") line = italicTrailingSpaceRe.ReplaceAllString(line, "$1$2") return line } @@ -199,7 +208,9 @@ func fixBoldSpacingLine(line string) string { for _, loc := range spans { // Process the non-code segment before this inline code span. seg := line[pos:loc[0]] + seg = boldLeadingSpaceRe.ReplaceAllString(seg, "$1$2$3") seg = boldTrailingSpaceRe.ReplaceAllString(seg, "$1$2") + seg = italicLeadingSpaceRe.ReplaceAllString(seg, "$1$2$3") seg = italicTrailingSpaceRe.ReplaceAllString(seg, "$1$2") sb.WriteString(seg) // Preserve inline code span as-is. @@ -208,7 +219,9 @@ func fixBoldSpacingLine(line string) string { } // Remaining non-code segment after the last code span. seg := line[pos:] + seg = boldLeadingSpaceRe.ReplaceAllString(seg, "$1$2$3") seg = boldTrailingSpaceRe.ReplaceAllString(seg, "$1$2") + seg = italicLeadingSpaceRe.ReplaceAllString(seg, "$1$2$3") seg = italicTrailingSpaceRe.ReplaceAllString(seg, "$1$2") sb.WriteString(seg) return sb.String() @@ -291,6 +304,30 @@ var contentContainers = [][2]string{ // indented (nested) items. var listItemRe = regexp.MustCompile(`^[ \t]*([-*+]|\d+[.)]) `) +// nestedListIndentRe matches nested list item markers indented with pairs of +// spaces. We rewrite those space pairs to tabs because some downstream +// round-trip paths treat multi-space indented ordered items as flat items or +// literal text, while tab indentation remains nested and avoids 4-space code +// block ambiguity. +var nestedListIndentRe = regexp.MustCompile(`^( {2,})([-*+]|\d+[.)]) `) + +func normalizeNestedListIndentation(md string) string { + lines := strings.Split(md, "\n") + for i, line := range lines { + matches := nestedListIndentRe.FindStringSubmatch(line) + if len(matches) != 3 { + continue + } + indent := matches[1] + if len(indent)%2 != 0 { + continue + } + tabs := strings.Repeat("\t", len(indent)/2) + lines[i] = tabs + line[len(indent):] + } + return strings.Join(lines, "\n") +} + // isListItemOrContinuation returns true for lines that are part of a list: // either a list item marker line or an indented continuation of a list item. // This is used to prevent blank lines being inserted between tight list lines, diff --git a/shortcuts/doc/markdown_fix_test.go b/shortcuts/doc/markdown_fix_test.go index b47ab7785..2c8b9f21f 100644 --- a/shortcuts/doc/markdown_fix_test.go +++ b/shortcuts/doc/markdown_fix_test.go @@ -14,6 +14,16 @@ func TestFixBoldSpacing(t *testing.T) { input string want string }{ + { + name: "leading space after opening bold", + input: "** hello**", + want: "**hello**", + }, + { + name: "leading space after opening italic", + input: "* hello*", + want: "*hello*", + }, { name: "trailing space before closing bold", input: "**hello **", @@ -54,6 +64,11 @@ func TestFixBoldSpacing(t *testing.T) { input: "**foo ** and `**bar **`", want: "**foo** and `**bar **`", }, + { + name: "opening space inside text tag fixed", + input: `** Helpful - 有用性:**`, + want: `**Helpful - 有用性:**`, + }, { name: "double-backtick inline code not modified", input: "``**hello **`` and **world **", @@ -222,6 +237,38 @@ func TestFixTopLevelSoftbreaks(t *testing.T) { } } +func TestNormalizeNestedListIndentation(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "nested ordered list uses tabs instead of space pairs", + input: "1. parent\n 1. child\n 1. grandchild", + want: "1. parent\n\t1. child\n\t\t1. grandchild", + }, + { + name: "nested mixed list markers use tabs instead of space pairs", + input: "- parent\n - child\n 1. grandchild", + want: "- parent\n\t- child\n\t\t1. grandchild", + }, + { + name: "top-level list unchanged", + input: "1. parent\n2. sibling", + want: "1. parent\n2. sibling", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := normalizeNestedListIndentation(tt.input) + if got != tt.want { + t.Errorf("normalizeNestedListIndentation(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} + func TestFixExportedMarkdown(t *testing.T) { // End-to-end: all fixes applied together input := "# **Title**\nparagraph one\nparagraph two\n**bold **\n> q1\n> q2\nsome text\n---" From f0ce39350f25138c3a30f99e7d26dc89850e1202 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AB=98=E6=98=A5=E6=99=96?= <18220699480@163.com> Date: Tue, 14 Apr 2026 18:34:02 +0800 Subject: [PATCH 2/4] fix(doc): avoid false positives in markdown spacing fixes - keep literal * x * and ** x ** text unchanged - only normalize indented nested list markers when a parent list item exists - add regression coverage for both CodeRabbit findings --- shortcuts/doc/markdown_fix.go | 32 +++++++++++++++++++++--------- shortcuts/doc/markdown_fix_test.go | 15 ++++++++++++++ 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/shortcuts/doc/markdown_fix.go b/shortcuts/doc/markdown_fix.go index bc7d11b56..3aca4988f 100644 --- a/shortcuts/doc/markdown_fix.go +++ b/shortcuts/doc/markdown_fix.go @@ -123,10 +123,10 @@ func fixBlockquoteHardBreaks(md string) string { // // Both fixes skip inline code spans to avoid modifying literal code content. var ( - boldLeadingSpaceRe = regexp.MustCompile(`(\*\*)\s+([^*\n].*?)(\*\*)`) + boldLeadingSpaceRe = regexp.MustCompile(`(\*\*)\s+([^*\n](?:[^*\n]*[^*\s\n])?)(\*\*)`) boldTrailingSpaceRe = regexp.MustCompile(`(\*\*\S[^*]*?)\s+(\*\*)`) - italicLeadingSpaceRe = regexp.MustCompile(`(\*)\s+([^*\n].*?)(\*)`) - italicTrailingSpaceRe = regexp.MustCompile(`(\*\S[^*]*?)\s+(\*)`) + italicLeadingSpaceRe = regexp.MustCompile(`(^|[^*])(\*)\s+([^*\n](?:[^*\n]*[^*\s\n])?)(\*)([^*]|$)`) + italicTrailingSpaceRe = regexp.MustCompile(`(^|[^*])(\*\S[^*]*?)\s+(\*)([^*]|$)`) // headingBoldRe uses [^*]+ (no asterisks) to avoid mismatching headings // that contain multiple disjoint bold spans such as "# **foo** and **bar**". headingBoldRe = regexp.MustCompile(`(?m)^(#{1,6})\s+\*\*([^*]+)\*\*\s*$`) @@ -199,8 +199,8 @@ func fixBoldSpacingLine(line string) string { if len(spans) == 0 { line = boldLeadingSpaceRe.ReplaceAllString(line, "$1$2$3") line = boldTrailingSpaceRe.ReplaceAllString(line, "$1$2") - line = italicLeadingSpaceRe.ReplaceAllString(line, "$1$2$3") - line = italicTrailingSpaceRe.ReplaceAllString(line, "$1$2") + line = italicLeadingSpaceRe.ReplaceAllString(line, "$1$2$3$4$5") + line = italicTrailingSpaceRe.ReplaceAllString(line, "$1$2$3$4") return line } var sb strings.Builder @@ -210,8 +210,8 @@ func fixBoldSpacingLine(line string) string { seg := line[pos:loc[0]] seg = boldLeadingSpaceRe.ReplaceAllString(seg, "$1$2$3") seg = boldTrailingSpaceRe.ReplaceAllString(seg, "$1$2") - seg = italicLeadingSpaceRe.ReplaceAllString(seg, "$1$2$3") - seg = italicTrailingSpaceRe.ReplaceAllString(seg, "$1$2") + seg = italicLeadingSpaceRe.ReplaceAllString(seg, "$1$2$3$4$5") + seg = italicTrailingSpaceRe.ReplaceAllString(seg, "$1$2$3$4") sb.WriteString(seg) // Preserve inline code span as-is. sb.WriteString(line[loc[0]:loc[1]]) @@ -221,8 +221,8 @@ func fixBoldSpacingLine(line string) string { seg := line[pos:] seg = boldLeadingSpaceRe.ReplaceAllString(seg, "$1$2$3") seg = boldTrailingSpaceRe.ReplaceAllString(seg, "$1$2") - seg = italicLeadingSpaceRe.ReplaceAllString(seg, "$1$2$3") - seg = italicTrailingSpaceRe.ReplaceAllString(seg, "$1$2") + seg = italicLeadingSpaceRe.ReplaceAllString(seg, "$1$2$3$4$5") + seg = italicTrailingSpaceRe.ReplaceAllString(seg, "$1$2$3$4") sb.WriteString(seg) return sb.String() } @@ -318,6 +318,9 @@ func normalizeNestedListIndentation(md string) string { if len(matches) != 3 { continue } + if !hasPreviousNonBlankListItem(lines, i) { + continue + } indent := matches[1] if len(indent)%2 != 0 { continue @@ -328,6 +331,17 @@ func normalizeNestedListIndentation(md string) string { return strings.Join(lines, "\n") } +func hasPreviousNonBlankListItem(lines []string, index int) bool { + for i := index - 1; i >= 0; i-- { + trimmed := strings.TrimSpace(lines[i]) + if trimmed == "" { + continue + } + return listItemRe.MatchString(lines[i]) + } + return false +} + // isListItemOrContinuation returns true for lines that are part of a list: // either a list item marker line or an indented continuation of a list item. // This is used to prevent blank lines being inserted between tight list lines, diff --git a/shortcuts/doc/markdown_fix_test.go b/shortcuts/doc/markdown_fix_test.go index 2c8b9f21f..720bd30b5 100644 --- a/shortcuts/doc/markdown_fix_test.go +++ b/shortcuts/doc/markdown_fix_test.go @@ -24,6 +24,16 @@ func TestFixBoldSpacing(t *testing.T) { input: "* hello*", want: "*hello*", }, + { + name: "ambiguous italic span stays literal", + input: "2 * x * y", + want: "2 * x * y", + }, + { + name: "ambiguous bold span stays literal", + input: "2 ** x ** y", + want: "2 ** x ** y", + }, { name: "trailing space before closing bold", input: "**hello **", @@ -258,6 +268,11 @@ func TestNormalizeNestedListIndentation(t *testing.T) { input: "1. parent\n2. sibling", want: "1. parent\n2. sibling", }, + { + name: "indented top-level marker without parent list stays unchanged", + input: "paragraph\n\n 1. item", + want: "paragraph\n\n 1. item", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { From 6b92fb7ad5cc30e562e09f6cebfb03acb214cf95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AB=98=E6=98=A5=E6=99=96?= <18220699480@163.com> Date: Mon, 20 Apr 2026 20:29:07 +0800 Subject: [PATCH 3/4] =?UTF-8?q?fix(doc):=20=E4=BF=AE=E6=AD=A3=E5=B5=8C?= =?UTF-8?q?=E5=A5=97=E5=88=97=E8=A1=A8=E7=BC=A9=E8=BF=9B=E7=9A=84=E7=A9=BA?= =?UTF-8?q?=E8=A1=8C=E8=AF=AF=E5=88=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 遇到空行时停止向上查找父级列表项,避免把 loose list sibling 误改成嵌套列表 - 避免把列表项中的四空格缩进代码块误改成 tab 缩进列表项 - 补充两个回归测试,并更新 fixBoldSpacing 注释使其与当前实现一致 --- shortcuts/doc/markdown_fix.go | 21 ++++++++++++--------- shortcuts/doc/markdown_fix_test.go | 10 ++++++++++ 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/shortcuts/doc/markdown_fix.go b/shortcuts/doc/markdown_fix.go index 3aca4988f..4196eaff8 100644 --- a/shortcuts/doc/markdown_fix.go +++ b/shortcuts/doc/markdown_fix.go @@ -111,17 +111,20 @@ func fixBlockquoteHardBreaks(md string) string { return strings.Join(out, "\n") } -// fixBoldSpacing fixes two issues with bold markers exported by Lark: +// fixBoldSpacing normalizes emphasis markers exported by Lark while preserving +// inline code spans: // -// 1. Trailing whitespace before closing **: "**text **" → "**text**" -// CommonMark requires no space before a closing delimiter; otherwise the -// ** is rendered as literal text. +// 1. Removes leading whitespace after opening ** and * delimiters: +// "** text**" → "**text**", "* text*" → "*text*" // -// 2. Redundant bold in ATX headings: "# **text**" → "# text" -// Headings are already bold, so the inner ** is visually redundant and -// some renderers display the markers literally. +// 2. Removes trailing whitespace before closing ** and * delimiters: +// "**text **" → "**text**", "*text *" → "*text*" // -// Both fixes skip inline code spans to avoid modifying literal code content. +// 3. Removes redundant bold around an entire ATX heading: +// "# **text**" → "# text" +// +// The bold and italic spacing fixes only run on non-code segments so literal +// code content is left unchanged. var ( boldLeadingSpaceRe = regexp.MustCompile(`(\*\*)\s+([^*\n](?:[^*\n]*[^*\s\n])?)(\*\*)`) boldTrailingSpaceRe = regexp.MustCompile(`(\*\*\S[^*]*?)\s+(\*\*)`) @@ -335,7 +338,7 @@ func hasPreviousNonBlankListItem(lines []string, index int) bool { for i := index - 1; i >= 0; i-- { trimmed := strings.TrimSpace(lines[i]) if trimmed == "" { - continue + return false } return listItemRe.MatchString(lines[i]) } diff --git a/shortcuts/doc/markdown_fix_test.go b/shortcuts/doc/markdown_fix_test.go index 720bd30b5..8aaee60e9 100644 --- a/shortcuts/doc/markdown_fix_test.go +++ b/shortcuts/doc/markdown_fix_test.go @@ -273,6 +273,16 @@ func TestNormalizeNestedListIndentation(t *testing.T) { input: "paragraph\n\n 1. item", want: "paragraph\n\n 1. item", }, + { + name: "blank-line-separated loose-list sibling stays unchanged", + input: "1. a\n\n 1. b", + want: "1. a\n\n 1. b", + }, + { + name: "indented code block inside list item stays unchanged", + input: "- parent\n\n 1. code", + want: "- parent\n\n 1. code", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { From 2f734f8937590efec4218f371a56a72eb7b84eda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AB=98=E6=98=A5=E6=99=96?= <18220699480@163.com> Date: Mon, 20 Apr 2026 21:16:44 +0800 Subject: [PATCH 4/4] =?UTF-8?q?fix(doc):=20=E4=BF=AE=E5=A4=8D=20Markdown?= =?UTF-8?q?=20emphasis=20=E7=A9=BA=E6=A0=BC=E5=9B=9E=E5=86=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 将 fixBoldSpacingLine 改为按星号 run 扫描,修复 ** hello **、* hello * 和同一行多个 italic span 的空格清理 - 保留 inline code、heading 和 *** hello** 这类近邻字面量,避免误改 emphasis nesting --- shortcuts/doc/markdown_fix.go | 114 +++++++++++++++++++++++------ shortcuts/doc/markdown_fix_test.go | 35 +++++++++ 2 files changed, 127 insertions(+), 22 deletions(-) diff --git a/shortcuts/doc/markdown_fix.go b/shortcuts/doc/markdown_fix.go index 4196eaff8..1ead7a61c 100644 --- a/shortcuts/doc/markdown_fix.go +++ b/shortcuts/doc/markdown_fix.go @@ -6,6 +6,8 @@ package doc import ( "regexp" "strings" + "unicode" + "unicode/utf8" ) // fixExportedMarkdown applies post-processing to Lark-exported Markdown to @@ -126,10 +128,6 @@ func fixBlockquoteHardBreaks(md string) string { // The bold and italic spacing fixes only run on non-code segments so literal // code content is left unchanged. var ( - boldLeadingSpaceRe = regexp.MustCompile(`(\*\*)\s+([^*\n](?:[^*\n]*[^*\s\n])?)(\*\*)`) - boldTrailingSpaceRe = regexp.MustCompile(`(\*\*\S[^*]*?)\s+(\*\*)`) - italicLeadingSpaceRe = regexp.MustCompile(`(^|[^*])(\*)\s+([^*\n](?:[^*\n]*[^*\s\n])?)(\*)([^*]|$)`) - italicTrailingSpaceRe = regexp.MustCompile(`(^|[^*])(\*\S[^*]*?)\s+(\*)([^*]|$)`) // headingBoldRe uses [^*]+ (no asterisks) to avoid mismatching headings // that contain multiple disjoint bold spans such as "# **foo** and **bar**". headingBoldRe = regexp.MustCompile(`(?m)^(#{1,6})\s+\*\*([^*]+)\*\*\s*$`) @@ -192,44 +190,116 @@ func scanInlineCodeSpans(line string) [][2]int { // fixBoldSpacingLine applies bold/italic trailing-space fixes to a single line, // skipping content inside inline code spans to avoid corrupting literal code. // ATX heading lines are also skipped here because headingBoldRe in fixBoldSpacing -// handles them separately and boldTrailingSpaceRe can misfire on headings with -// multiple disjoint bold spans (e.g. "# **foo** and **bar**"). +// handles them separately, keeping heading-only normalization isolated from the +// inline emphasis spacing scanner below. func fixBoldSpacingLine(line string) string { if atxHeadingRe.MatchString(line) { return line } spans := scanInlineCodeSpans(line) if len(spans) == 0 { - line = boldLeadingSpaceRe.ReplaceAllString(line, "$1$2$3") - line = boldTrailingSpaceRe.ReplaceAllString(line, "$1$2") - line = italicLeadingSpaceRe.ReplaceAllString(line, "$1$2$3$4$5") - line = italicTrailingSpaceRe.ReplaceAllString(line, "$1$2$3$4") - return line + return fixEmphasisSpacingSegment(line) } var sb strings.Builder pos := 0 for _, loc := range spans { // Process the non-code segment before this inline code span. seg := line[pos:loc[0]] - seg = boldLeadingSpaceRe.ReplaceAllString(seg, "$1$2$3") - seg = boldTrailingSpaceRe.ReplaceAllString(seg, "$1$2") - seg = italicLeadingSpaceRe.ReplaceAllString(seg, "$1$2$3$4$5") - seg = italicTrailingSpaceRe.ReplaceAllString(seg, "$1$2$3$4") - sb.WriteString(seg) + sb.WriteString(fixEmphasisSpacingSegment(seg)) // Preserve inline code span as-is. sb.WriteString(line[loc[0]:loc[1]]) pos = loc[1] } // Remaining non-code segment after the last code span. - seg := line[pos:] - seg = boldLeadingSpaceRe.ReplaceAllString(seg, "$1$2$3") - seg = boldTrailingSpaceRe.ReplaceAllString(seg, "$1$2") - seg = italicLeadingSpaceRe.ReplaceAllString(seg, "$1$2$3$4$5") - seg = italicTrailingSpaceRe.ReplaceAllString(seg, "$1$2$3$4") - sb.WriteString(seg) + sb.WriteString(fixEmphasisSpacingSegment(line[pos:])) return sb.String() } +// fixEmphasisSpacingSegment trims only the whitespace immediately inside simple +// *...* and **...** spans. It deliberately ignores runs of 3+ asterisks and +// any candidate whose payload contains another asterisk so nested emphasis-like +// text remains untouched. When both inner sides contain whitespace, single-rune +// payloads are preserved as literal text (for example "* x *" and "** x **"). +func fixEmphasisSpacingSegment(seg string) string { + if !strings.Contains(seg, "*") { + return seg + } + + var sb strings.Builder + pos := 0 + for pos < len(seg) { + openStart, openEnd, ok := nextAsteriskRun(seg, pos) + if !ok { + sb.WriteString(seg[pos:]) + break + } + + sb.WriteString(seg[pos:openStart]) + + markerLen := openEnd - openStart + if markerLen != 1 && markerLen != 2 { + sb.WriteString(seg[openStart:openEnd]) + pos = openEnd + continue + } + + closeStart, closeEnd, ok := nextAsteriskRun(seg, openEnd) + if !ok || closeEnd-closeStart != markerLen { + sb.WriteString(seg[openStart:openEnd]) + pos = openEnd + continue + } + + payload := seg[openEnd:closeStart] + normalized, shouldNormalize := normalizeEmphasisPayload(payload) + if !shouldNormalize { + sb.WriteString(seg[openStart:closeEnd]) + pos = closeEnd + continue + } + + marker := seg[openStart:openEnd] + sb.WriteString(marker) + sb.WriteString(normalized) + sb.WriteString(marker) + pos = closeEnd + } + return sb.String() +} + +func nextAsteriskRun(s string, start int) (runStart, runEnd int, ok bool) { + for i := start; i < len(s); i++ { + if s[i] != '*' { + continue + } + j := i + for j < len(s) && s[j] == '*' { + j++ + } + return i, j, true + } + return 0, 0, false +} + +func normalizeEmphasisPayload(payload string) (string, bool) { + trimmedLeft := strings.TrimLeftFunc(payload, unicode.IsSpace) + trimmed := strings.TrimRightFunc(trimmedLeft, unicode.IsSpace) + if trimmed == "" { + return payload, false + } + + hasLeadingSpace := len(trimmedLeft) != len(payload) + hasTrailingSpace := len(trimmed) != len(trimmedLeft) + if !hasLeadingSpace && !hasTrailingSpace { + return payload, true + } + + if hasLeadingSpace && hasTrailingSpace && utf8.RuneCountInString(trimmed) == 1 { + return payload, false + } + return trimmed, true +} + var setextRe = regexp.MustCompile(`(?m)^([^\n]+)\n(-{3,}\s*$)`) func fixSetextAmbiguity(md string) string { diff --git a/shortcuts/doc/markdown_fix_test.go b/shortcuts/doc/markdown_fix_test.go index 8aaee60e9..81ac26a9a 100644 --- a/shortcuts/doc/markdown_fix_test.go +++ b/shortcuts/doc/markdown_fix_test.go @@ -24,6 +24,21 @@ func TestFixBoldSpacing(t *testing.T) { input: "* hello*", want: "*hello*", }, + { + name: "leading and trailing spaces inside bold are collapsed", + input: "** hello **", + want: "**hello**", + }, + { + name: "leading and trailing spaces inside italic are collapsed", + input: "* hello *", + want: "*hello*", + }, + { + name: "multiple spaced italic spans on one line are each collapsed", + input: "* a* * b*", + want: "*a* *b*", + }, { name: "ambiguous italic span stays literal", input: "2 * x * y", @@ -34,6 +49,21 @@ func TestFixBoldSpacing(t *testing.T) { input: "2 ** x ** y", want: "2 ** x ** y", }, + { + name: "single-rune italic with spaces on both sides stays literal", + input: "* x *", + want: "* x *", + }, + { + name: "single-rune bold with spaces on both sides stays literal", + input: "** x **", + want: "** x **", + }, + { + name: "triple-asterisk near miss stays literal", + input: "*** hello**", + want: "*** hello**", + }, { name: "trailing space before closing bold", input: "**hello **", @@ -74,6 +104,11 @@ func TestFixBoldSpacing(t *testing.T) { input: "**foo ** and `**bar **`", want: "**foo** and `**bar **`", }, + { + name: "inline code with spaced italic stays literal while outside span is fixed", + input: "`* hello *` and * hello *", + want: "`* hello *` and *hello*", + }, { name: "opening space inside text tag fixed", input: `** Helpful - 有用性:**`,