diff --git a/shortcuts/im/helpers.go b/shortcuts/im/helpers.go index 9086b0688..a4c2830d2 100644 --- a/shortcuts/im/helpers.go +++ b/shortcuts/im/helpers.go @@ -18,6 +18,8 @@ import ( "regexp" "strconv" "strings" + "unicode" + "unicode/utf8" "github.com/larksuite/cli/extension/fileio" "github.com/larksuite/cli/internal/output" @@ -788,8 +790,9 @@ var ( ) const ( - markdownCodeBlockPlaceholder = "___CB_" - postBlankLinePlaceholder = "\u200B" + markdownCodeBlockPlaceholder = "___CB_" + markdownInlineCodePlaceholder = "___IC_" + postBlankLinePlaceholder = "\u200B" ) type markdownPart struct { @@ -816,8 +819,40 @@ func restoreMarkdownCodeBlocks(text string, codeBlocks []string) string { return restored } +func protectMarkdownInlineCode(text string) (string, []string) { + var inlineCodes []string + lines := strings.Split(text, "\n") + for i, line := range lines { + spans := scanInlineCodeSpans(line) + if len(spans) == 0 { + continue + } + var sb strings.Builder + pos := 0 + for _, span := range spans { + sb.WriteString(line[pos:span[0]]) + idx := len(inlineCodes) + inlineCodes = append(inlineCodes, line[span[0]:span[1]]) + sb.WriteString(fmt.Sprintf("%s%d___", markdownInlineCodePlaceholder, idx)) + pos = span[1] + } + sb.WriteString(line[pos:]) + lines[i] = sb.String() + } + return strings.Join(lines, "\n"), inlineCodes +} + +func restoreMarkdownInlineCode(text string, inlineCodes []string) string { + restored := text + for i, code := range inlineCodes { + restored = strings.Replace(restored, fmt.Sprintf("%s%d___", markdownInlineCodePlaceholder, i), code, 1) + } + return restored +} + func optimizeMarkdownStyle(text string) string { r, codeBlocks := protectMarkdownCodeBlocks(text) + r = normalizeMarkdownEmphasisSpacing(r) // Only downgrade when original text has H1~H3; order matters (H2~H6 first). if reHasH1toH3.MatchString(text) { @@ -849,6 +884,165 @@ func optimizeMarkdownStyle(text string) string { return r } +// normalizeMarkdownEmphasisSpacing trims whitespace immediately inside simple +// *...*, **...**, and ***...*** spans while preserving fenced code blocks and +// inline code. This hardens AI-generated markdown such as "** bold **" into +// "**bold**" so Feishu's md renderer can recognize emphasis instead of +// leaking literal '*'. +func normalizeMarkdownEmphasisSpacing(markdown string) string { + protected, codeBlocks := protectMarkdownCodeBlocks(markdown) + protected, inlineCodes := protectMarkdownInlineCode(protected) + + lines := strings.Split(protected, "\n") + for i, line := range lines { + lines[i] = normalizeEmphasisSpacingSegment(line) + } + + normalized := strings.Join(lines, "\n") + normalized = restoreMarkdownInlineCode(normalized, inlineCodes) + return restoreMarkdownCodeBlocks(normalized, codeBlocks) +} + +// scanInlineCodeSpans returns byte ranges [start, end) for inline code spans +// using matching backtick runs, so emphasis normalization skips literal code. +func scanInlineCodeSpans(line string) [][2]int { + var spans [][2]int + i := 0 + for i < len(line) { + if line[i] != '`' { + i++ + continue + } + start := i + for i < len(line) && line[i] == '`' { + i++ + } + delim := line[start:i] + j := i + for j <= len(line)-len(delim) { + if line[j] == '`' { + k := j + for k < len(line) && line[k] == '`' { + k++ + } + if k-j == len(delim) { + spans = append(spans, [2]int{start, k}) + i = k + break + } + j = k + } else { + j++ + } + } + } + return spans +} + +func normalizeEmphasisSpacingSegment(seg string) string { + if !strings.Contains(seg, "*") { + return seg + } + + var sb strings.Builder + pos := 0 + for pos < len(seg) { + openStart, openEnd, ok := nextAsteriskRun(seg, pos) + if !ok { + sb.WriteString(seg[pos:]) + break + } + + sb.WriteString(seg[pos:openStart]) + + markerLen := openEnd - openStart + if markerLen != 1 && markerLen != 2 && markerLen != 3 { + sb.WriteString(seg[openStart:openEnd]) + pos = openEnd + continue + } + + closeStart, closeEnd, ok := nextAsteriskRun(seg, openEnd) + if !ok || closeEnd-closeStart != markerLen { + sb.WriteString(seg[openStart:openEnd]) + pos = openEnd + continue + } + if !hasSimpleEmphasisBoundaries(seg, openStart, closeEnd) { + sb.WriteString(seg[openStart:closeEnd]) + pos = closeEnd + continue + } + + payload := seg[openEnd:closeStart] + normalized, shouldNormalize := normalizeEmphasisPayload(payload) + if !shouldNormalize { + sb.WriteString(seg[openStart:closeEnd]) + pos = closeEnd + continue + } + + marker := seg[openStart:openEnd] + sb.WriteString(marker) + sb.WriteString(normalized) + sb.WriteString(marker) + pos = closeEnd + } + return sb.String() +} + +func nextAsteriskRun(s string, start int) (runStart, runEnd int, ok bool) { + for i := start; i < len(s); i++ { + if s[i] != '*' { + continue + } + j := i + for j < len(s) && s[j] == '*' { + j++ + } + return i, j, true + } + return 0, 0, false +} + +func hasSimpleEmphasisBoundaries(s string, openStart, closeEnd int) bool { + if openStart > 0 { + prev, _ := utf8.DecodeLastRuneInString(s[:openStart]) + if isWordLikeRune(prev) { + return false + } + } + if closeEnd < len(s) { + next, _ := utf8.DecodeRuneInString(s[closeEnd:]) + if isWordLikeRune(next) { + return false + } + } + return true +} + +func isWordLikeRune(r rune) bool { + return unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.Is(unicode.Han, r) +} + +func normalizeEmphasisPayload(payload string) (string, bool) { + trimmedLeft := strings.TrimLeftFunc(payload, unicode.IsSpace) + trimmed := strings.TrimRightFunc(trimmedLeft, unicode.IsSpace) + if trimmed == "" { + return payload, false + } + + hasLeadingSpace := len(trimmedLeft) != len(payload) + hasTrailingSpace := len(trimmed) != len(trimmedLeft) + if !hasLeadingSpace && !hasTrailingSpace { + return payload, false + } + if strings.Contains(trimmed, "*") { + return payload, false + } + return trimmed, true +} + func shouldUseSegmentedPost(markdown string) bool { protected, _ := protectMarkdownCodeBlocks(markdown) return reBlankLineSeparator.MatchString(protected) diff --git a/shortcuts/im/helpers_test.go b/shortcuts/im/helpers_test.go index 0bb39049f..0bdae7bd3 100644 --- a/shortcuts/im/helpers_test.go +++ b/shortcuts/im/helpers_test.go @@ -519,6 +519,50 @@ func TestWrapMarkdownAsPost(t *testing.T) { } }) + t.Run("normalizes malformed bold spacing", func(t *testing.T) { + got := wrapMarkdownAsPost("hello ** world **") + node := decodePostParagraphForTest(t, got, 0) + if node["text"] != "hello **world**" { + t.Fatalf("wrapMarkdownAsPost() text = %#v, want %q", node["text"], "hello **world**") + } + }) + + t.Run("normalizes malformed italic and bold italic spacing", func(t *testing.T) { + got := wrapMarkdownAsPost("* italic * and *** both ***") + node := decodePostParagraphForTest(t, got, 0) + if node["text"] != "*italic* and ***both***" { + t.Fatalf("wrapMarkdownAsPost() text = %#v, want %q", node["text"], "*italic* and ***both***") + } + }) + + t.Run("preserves inline code and fenced code spacing", func(t *testing.T) { + input := "code `** keep **` and prose ** fix **\n```md\n** keep fenced **\n```" + got := wrapMarkdownAsPost(input) + node := decodePostParagraphForTest(t, got, 0) + text, _ := node["text"].(string) + if text != "code `** keep **` and prose **fix**\n```md\n** keep fenced **\n```" { + t.Fatalf("wrapMarkdownAsPost() text = %#v", node["text"]) + } + }) + + t.Run("preserves multiple inline code spans", func(t *testing.T) { + input := "`** keep **` prose ** fix ** ``* keep *`` and * okay *" + got := wrapMarkdownAsPost(input) + node := decodePostParagraphForTest(t, got, 0) + text, _ := node["text"].(string) + if text != "`** keep **` prose **fix** ``* keep *`` and *okay*" { + t.Fatalf("wrapMarkdownAsPost() text = %#v", node["text"]) + } + }) + + t.Run("preserves embedded non emphatic spaced asterisks", func(t *testing.T) { + got := wrapMarkdownAsPost("hello** world **there") + node := decodePostParagraphForTest(t, got, 0) + if node["text"] != "hello** world **there" { + t.Fatalf("wrapMarkdownAsPost() text = %#v", node["text"]) + } + }) + t.Run("bare URL becomes a tag", func(t *testing.T) { got := wrapMarkdownAsPost("see https://example.com/flow_id=abc_def done") if !strings.Contains(got, `"tag":"a"`) { diff --git a/skills/lark-im/references/lark-im-messages-reply.md b/skills/lark-im/references/lark-im-messages-reply.md index cc73a5993..45f15a72b 100644 --- a/skills/lark-im/references/lark-im-messages-reply.md +++ b/skills/lark-im/references/lark-im-messages-reply.md @@ -64,6 +64,7 @@ So `--markdown` is a convenience mode, not a full Markdown compatibility layer. - Block spacing and line breaks may be normalized during conversion. - Code blocks are preserved as code blocks. - Excess blank lines are compressed. +- Emphasis with `*`, `**`, or `***` should not contain inner edge spaces. Use `**bold**` instead of `** bold **`, `** bold**`, or `**bold **`; use `*italic*` instead of `* italic *`, `* italic*`, or `*italic *`. - Only remote `http://...`, `https://...`, or already-uploaded `img_xxx` Markdown images are kept reliably. - Local paths in Markdown image syntax like `![x](./a.png)` are **not** auto-uploaded by `--markdown`. - If remote Markdown image handling fails, that image is removed with a warning. diff --git a/skills/lark-im/references/lark-im-messages-send.md b/skills/lark-im/references/lark-im-messages-send.md index a328063a3..92ed2b66b 100644 --- a/skills/lark-im/references/lark-im-messages-send.md +++ b/skills/lark-im/references/lark-im-messages-send.md @@ -64,6 +64,7 @@ This means `--markdown` is convenient, but it is not a full-fidelity Markdown tr - Block spacing and line breaks may be normalized during conversion. - Code blocks are preserved as code blocks. - Excess blank lines are compressed. +- Emphasis with `*`, `**`, or `***` should not contain inner edge spaces. Use `**bold**` instead of `** bold **`, `** bold**`, or `**bold **`; use `*italic*` instead of `* italic *`, `* italic*`, or `*italic *`. - Only `http://...`, `https://...`, or already-uploaded `img_xxx` Markdown images are kept reliably. - Local paths in Markdown image syntax like `![x](./a.png)` are **not** auto-uploaded by `--markdown`; they may be stripped during optimization. - If remote Markdown image download/upload fails, that image is removed with a warning.