-
Notifications
You must be signed in to change notification settings - Fork 583
Fix/im markdown emphasis spacing #667
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
e0aed5c
7197b5e
efc50f7
78d037a
86a9c67
9aff324
dbf2613
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,8 @@ import ( | |
| "regexp" | ||
| "strconv" | ||
| "strings" | ||
| "unicode" | ||
| "unicode/utf8" | ||
|
|
||
| "github.com/larksuite/cli/extension/fileio" | ||
| "github.com/larksuite/cli/internal/output" | ||
|
|
@@ -788,8 +790,9 @@ var ( | |
| ) | ||
|
|
||
| const ( | ||
| markdownCodeBlockPlaceholder = "___CB_" | ||
| postBlankLinePlaceholder = "\u200B" | ||
| markdownCodeBlockPlaceholder = "___CB_" | ||
| markdownInlineCodePlaceholder = "___IC_" | ||
| postBlankLinePlaceholder = "\u200B" | ||
| ) | ||
|
|
||
| type markdownPart struct { | ||
|
|
@@ -816,8 +819,40 @@ func restoreMarkdownCodeBlocks(text string, codeBlocks []string) string { | |
| return restored | ||
| } | ||
|
|
||
| func protectMarkdownInlineCode(text string) (string, []string) { | ||
| var inlineCodes []string | ||
| lines := strings.Split(text, "\n") | ||
| for i, line := range lines { | ||
| spans := scanInlineCodeSpans(line) | ||
| if len(spans) == 0 { | ||
| continue | ||
| } | ||
| var sb strings.Builder | ||
| pos := 0 | ||
| for _, span := range spans { | ||
| sb.WriteString(line[pos:span[0]]) | ||
| idx := len(inlineCodes) | ||
| inlineCodes = append(inlineCodes, line[span[0]:span[1]]) | ||
| sb.WriteString(fmt.Sprintf("%s%d___", markdownInlineCodePlaceholder, idx)) | ||
| pos = span[1] | ||
| } | ||
| sb.WriteString(line[pos:]) | ||
| lines[i] = sb.String() | ||
| } | ||
| return strings.Join(lines, "\n"), inlineCodes | ||
| } | ||
|
|
||
| func restoreMarkdownInlineCode(text string, inlineCodes []string) string { | ||
| restored := text | ||
| for i, code := range inlineCodes { | ||
| restored = strings.Replace(restored, fmt.Sprintf("%s%d___", markdownInlineCodePlaceholder, i), code, 1) | ||
| } | ||
| return restored | ||
| } | ||
|
|
||
| func optimizeMarkdownStyle(text string) string { | ||
| r, codeBlocks := protectMarkdownCodeBlocks(text) | ||
| r = normalizeMarkdownEmphasisSpacing(r) | ||
|
|
||
| // Only downgrade when original text has H1~H3; order matters (H2~H6 first). | ||
| if reHasH1toH3.MatchString(text) { | ||
|
|
@@ -849,6 +884,165 @@ func optimizeMarkdownStyle(text string) string { | |
| return r | ||
| } | ||
|
|
||
| // normalizeMarkdownEmphasisSpacing trims whitespace immediately inside simple | ||
| // *...*, **...**, and ***...*** spans while preserving fenced code blocks and | ||
| // inline code. This hardens AI-generated markdown such as "** bold **" into | ||
| // "**bold**" so Feishu's md renderer can recognize emphasis instead of | ||
| // leaking literal '*'. | ||
| func normalizeMarkdownEmphasisSpacing(markdown string) string { | ||
| protected, codeBlocks := protectMarkdownCodeBlocks(markdown) | ||
| protected, inlineCodes := protectMarkdownInlineCode(protected) | ||
|
|
||
| lines := strings.Split(protected, "\n") | ||
| for i, line := range lines { | ||
| lines[i] = normalizeEmphasisSpacingSegment(line) | ||
| } | ||
|
|
||
| normalized := strings.Join(lines, "\n") | ||
| normalized = restoreMarkdownInlineCode(normalized, inlineCodes) | ||
| return restoreMarkdownCodeBlocks(normalized, codeBlocks) | ||
| } | ||
|
|
||
| // scanInlineCodeSpans returns byte ranges [start, end) for inline code spans | ||
| // using matching backtick runs, so emphasis normalization skips literal code. | ||
| func scanInlineCodeSpans(line string) [][2]int { | ||
| var spans [][2]int | ||
| i := 0 | ||
| for i < len(line) { | ||
| if line[i] != '`' { | ||
| i++ | ||
| continue | ||
| } | ||
| start := i | ||
| for i < len(line) && line[i] == '`' { | ||
| i++ | ||
| } | ||
| delim := line[start:i] | ||
| j := i | ||
| for j <= len(line)-len(delim) { | ||
| if line[j] == '`' { | ||
| k := j | ||
| for k < len(line) && line[k] == '`' { | ||
| k++ | ||
| } | ||
| if k-j == len(delim) { | ||
| spans = append(spans, [2]int{start, k}) | ||
| i = k | ||
| break | ||
| } | ||
| j = k | ||
| } else { | ||
| j++ | ||
| } | ||
| } | ||
| } | ||
| return spans | ||
| } | ||
|
|
||
| func normalizeEmphasisSpacingSegment(seg string) string { | ||
| if !strings.Contains(seg, "*") { | ||
| return seg | ||
| } | ||
|
|
||
| var sb strings.Builder | ||
| pos := 0 | ||
| for pos < len(seg) { | ||
| openStart, openEnd, ok := nextAsteriskRun(seg, pos) | ||
| if !ok { | ||
| sb.WriteString(seg[pos:]) | ||
| break | ||
| } | ||
|
|
||
| sb.WriteString(seg[pos:openStart]) | ||
|
|
||
| markerLen := openEnd - openStart | ||
| if markerLen != 1 && markerLen != 2 && markerLen != 3 { | ||
| sb.WriteString(seg[openStart:openEnd]) | ||
| pos = openEnd | ||
| continue | ||
| } | ||
|
|
||
| closeStart, closeEnd, ok := nextAsteriskRun(seg, openEnd) | ||
| if !ok || closeEnd-closeStart != markerLen { | ||
| sb.WriteString(seg[openStart:openEnd]) | ||
| pos = openEnd | ||
| continue | ||
| } | ||
| if !hasSimpleEmphasisBoundaries(seg, openStart, closeEnd) { | ||
| sb.WriteString(seg[openStart:closeEnd]) | ||
| pos = closeEnd | ||
| continue | ||
| } | ||
|
|
||
| payload := seg[openEnd:closeStart] | ||
| normalized, shouldNormalize := normalizeEmphasisPayload(payload) | ||
| if !shouldNormalize { | ||
| sb.WriteString(seg[openStart:closeEnd]) | ||
| pos = closeEnd | ||
| continue | ||
| } | ||
|
|
||
| marker := seg[openStart:openEnd] | ||
| sb.WriteString(marker) | ||
| sb.WriteString(normalized) | ||
| sb.WriteString(marker) | ||
| pos = closeEnd | ||
| } | ||
| return sb.String() | ||
| } | ||
|
Comment on lines
+942
to
+992
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Edge case: list items containing a stray
Examples (single line each):
Bold/triple-asterisk variants inside such list items still work fine because the marker-length check filters them out — the issue is specific to lines that begin with A simple guard would be to skip normalization when 🛡️ Sketch of guard (add to `hasSimpleEmphasisBoundaries` or its caller)// Reject list-item-style openers: line-start `*` followed by whitespace.
isLineStart := openStart == 0 || s[openStart-1] == '\n'
if isLineStart && openEnd-openStart == 1 && openEnd < len(s) {
if r, _ := utf8.DecodeRuneInString(s[openEnd:]); unicode.IsSpace(r) {
return false
}
}🤖 Prompt for AI Agents |
||
|
|
||
| func nextAsteriskRun(s string, start int) (runStart, runEnd int, ok bool) { | ||
| for i := start; i < len(s); i++ { | ||
| if s[i] != '*' { | ||
| continue | ||
| } | ||
| j := i | ||
| for j < len(s) && s[j] == '*' { | ||
| j++ | ||
| } | ||
| return i, j, true | ||
| } | ||
| return 0, 0, false | ||
| } | ||
|
|
||
| func hasSimpleEmphasisBoundaries(s string, openStart, closeEnd int) bool { | ||
| if openStart > 0 { | ||
| prev, _ := utf8.DecodeLastRuneInString(s[:openStart]) | ||
| if isWordLikeRune(prev) { | ||
| return false | ||
| } | ||
| } | ||
| if closeEnd < len(s) { | ||
| next, _ := utf8.DecodeRuneInString(s[closeEnd:]) | ||
| if isWordLikeRune(next) { | ||
| return false | ||
| } | ||
| } | ||
| return true | ||
| } | ||
|
|
||
| func isWordLikeRune(r rune) bool { | ||
| return unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.Is(unicode.Han, r) | ||
| } | ||
|
|
||
| func normalizeEmphasisPayload(payload string) (string, bool) { | ||
| trimmedLeft := strings.TrimLeftFunc(payload, unicode.IsSpace) | ||
| trimmed := strings.TrimRightFunc(trimmedLeft, unicode.IsSpace) | ||
| if trimmed == "" { | ||
| return payload, false | ||
| } | ||
|
|
||
| hasLeadingSpace := len(trimmedLeft) != len(payload) | ||
| hasTrailingSpace := len(trimmed) != len(trimmedLeft) | ||
| if !hasLeadingSpace && !hasTrailingSpace { | ||
| return payload, false | ||
| } | ||
| if strings.Contains(trimmed, "*") { | ||
| return payload, false | ||
| } | ||
| return trimmed, true | ||
| } | ||
|
|
||
| func shouldUseSegmentedPost(markdown string) bool { | ||
| protected, _ := protectMarkdownCodeBlocks(markdown) | ||
| return reBlankLineSeparator.MatchString(protected) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: larksuite/cli
Length of output: 1352
🏁 Script executed:
fd -e _test.go shortcuts/im/ | head -20Repository: larksuite/cli
Length of output: 415
🏁 Script executed:
rg -nP --type=go 'scanInlineCodeSpans|protectMarkdownInlineCode|unmatched.*backtick|backtick.*unmatched' shortcuts/im/ -A 5 -B 5Repository: larksuite/cli
Length of output: 2493
🏁 Script executed:
fd --full-path '.*helpers.*test.*\.go' shortcuts/im/Repository: larksuite/cli
Length of output: 205
🏁 Script executed:
Repository: larksuite/cli
Length of output: 39
🏁 Script executed:
cat -n shortcuts/im/helpers_test.go | head -150Repository: larksuite/cli
Length of output: 6334
🏁 Script executed:
rg -n 'protectMarkdownInlineCode|scanInlineCodeSpans' shortcuts/im/helpers_test.go -A 20Repository: larksuite/cli
Length of output: 39
🏁 Script executed:
Repository: larksuite/cli
Length of output: 90
🏁 Script executed:
Repository: larksuite/cli
Length of output: 1671
🏁 Script executed:
rg -n 'normalizeMarkdownEmphasisSpacing|protectMarkdownInlineCode|scanInlineCodeSpans|unmatched.*backtick' shortcuts/im/helpers_test.go -A 10 -B 2Repository: larksuite/cli
Length of output: 39
🏁 Script executed:
sed -n '319,375p' shortcuts/im/helpers_test.goRepository: larksuite/cli
Length of output: 1475
🏁 Script executed:
Repository: larksuite/cli
Length of output: 170
🏁 Script executed:
sed -n '906,940p' shortcuts/im/helpers.goRepository: larksuite/cli
Length of output: 738
🏁 Script executed:
rg -n 'backtick|inline' shortcuts/im/helpers_test.goRepository: larksuite/cli
Length of output: 210
🏁 Script executed:
grep -n 'Test.*[Mm]arkdown\|Test.*[Ee]mphasis\|Test.*[Cc]ode' shortcuts/im/helpers_test.goRepository: larksuite/cli
Length of output: 575
🏁 Script executed:
sed -n '538,565p' shortcuts/im/helpers_test.goRepository: larksuite/cli
Length of output: 1180
🏁 Script executed:
sed -n '506,560p' shortcuts/im/helpers_test.goRepository: larksuite/cli
Length of output: 2282
🏁 Script executed:
rg -n 'unmatched' shortcuts/im/helpers_test.goRepository: larksuite/cli
Length of output: 39
🏁 Script executed:
sed -n '822,850p' shortcuts/im/helpers.goRepository: larksuite/cli
Length of output: 942
🏁 Script executed:
Repository: larksuite/cli
Length of output: 812
🏁 Script executed:
Repository: larksuite/cli
Length of output: 304
Add a test case for unmatched backticks in inline code scanning.
The scan logic correctly handles unmatched opening backticks: when no closing run is found,
iremains at the post-run position, allowing the outer loop to continue and subsequent backticks on the same line to form spans independently. This fallback behavior matches CommonMark's intent of not requiring the first backtick run to be the code opener.However, the existing test suite for
wrapMarkdownAsPostcovers only balanced delimiters (`code`and``nested``). Add a test case with an unmatched backtick followed by a balanced pair (e.g.,"textmiddlecodeend"`) to document this behavior and ensure the emphasis normalization skips over it correctly.🤖 Prompt for AI Agents