Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 196 additions & 2 deletions shortcuts/im/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
"regexp"
"strconv"
"strings"
"unicode"
"unicode/utf8"

"github.com/larksuite/cli/extension/fileio"
"github.com/larksuite/cli/internal/output"
Expand Down Expand Up @@ -788,8 +790,9 @@ var (
)

const (
markdownCodeBlockPlaceholder = "___CB_"
postBlankLinePlaceholder = "\u200B"
markdownCodeBlockPlaceholder = "___CB_"
markdownInlineCodePlaceholder = "___IC_"
postBlankLinePlaceholder = "\u200B"
)

type markdownPart struct {
Expand All @@ -816,8 +819,40 @@ func restoreMarkdownCodeBlocks(text string, codeBlocks []string) string {
return restored
}

func protectMarkdownInlineCode(text string) (string, []string) {
var inlineCodes []string
lines := strings.Split(text, "\n")
for i, line := range lines {
spans := scanInlineCodeSpans(line)
if len(spans) == 0 {
continue
}
var sb strings.Builder
pos := 0
for _, span := range spans {
sb.WriteString(line[pos:span[0]])
idx := len(inlineCodes)
inlineCodes = append(inlineCodes, line[span[0]:span[1]])
sb.WriteString(fmt.Sprintf("%s%d___", markdownInlineCodePlaceholder, idx))
pos = span[1]
}
sb.WriteString(line[pos:])
lines[i] = sb.String()
}
return strings.Join(lines, "\n"), inlineCodes
}

func restoreMarkdownInlineCode(text string, inlineCodes []string) string {
restored := text
for i, code := range inlineCodes {
restored = strings.Replace(restored, fmt.Sprintf("%s%d___", markdownInlineCodePlaceholder, i), code, 1)
}
return restored
}

func optimizeMarkdownStyle(text string) string {
r, codeBlocks := protectMarkdownCodeBlocks(text)
r = normalizeMarkdownEmphasisSpacing(r)

// Only downgrade when original text has H1~H3; order matters (H2~H6 first).
if reHasH1toH3.MatchString(text) {
Expand Down Expand Up @@ -849,6 +884,165 @@ func optimizeMarkdownStyle(text string) string {
return r
}

// normalizeMarkdownEmphasisSpacing trims whitespace immediately inside simple
// *...*, **...**, and ***...*** spans while preserving fenced code blocks and
// inline code. This hardens AI-generated markdown such as "** bold **" into
// "**bold**" so Feishu's md renderer can recognize emphasis instead of
// leaking literal '*'.
func normalizeMarkdownEmphasisSpacing(markdown string) string {
protected, codeBlocks := protectMarkdownCodeBlocks(markdown)
protected, inlineCodes := protectMarkdownInlineCode(protected)

lines := strings.Split(protected, "\n")
for i, line := range lines {
lines[i] = normalizeEmphasisSpacingSegment(line)
}

normalized := strings.Join(lines, "\n")
normalized = restoreMarkdownInlineCode(normalized, inlineCodes)
return restoreMarkdownCodeBlocks(normalized, codeBlocks)
}

// scanInlineCodeSpans returns byte ranges [start, end) for inline code spans
// using matching backtick runs, so emphasis normalization skips literal code.
func scanInlineCodeSpans(line string) [][2]int {
var spans [][2]int
i := 0
for i < len(line) {
if line[i] != '`' {
i++
continue
}
start := i
for i < len(line) && line[i] == '`' {
i++
}
delim := line[start:i]
j := i
for j <= len(line)-len(delim) {
if line[j] == '`' {
k := j
for k < len(line) && line[k] == '`' {
k++
}
if k-j == len(delim) {
spans = append(spans, [2]int{start, k})
i = k
break
}
j = k
} else {
j++
}
}
}
return spans
}
Comment on lines +908 to +940
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
rg -nP --type=go -C2 'scanInlineCodeSpans|protectMarkdownInlineCode' shortcuts/im/

Repository: larksuite/cli

Length of output: 1352


🏁 Script executed:

fd -e _test.go shortcuts/im/ | head -20

Repository: larksuite/cli

Length of output: 415


🏁 Script executed:

rg -nP --type=go 'scanInlineCodeSpans|protectMarkdownInlineCode|unmatched.*backtick|backtick.*unmatched' shortcuts/im/ -A 5 -B 5

Repository: larksuite/cli

Length of output: 2493


🏁 Script executed:

fd --full-path '.*helpers.*test.*\.go' shortcuts/im/

Repository: larksuite/cli

Length of output: 205


🏁 Script executed:

rg -nP --type=go 'func Test.*' shortcuts/im/ | grep -i 'inline\|backtick\|code'

Repository: larksuite/cli

Length of output: 39


🏁 Script executed:

cat -n shortcuts/im/helpers_test.go | head -150

Repository: larksuite/cli

Length of output: 6334


🏁 Script executed:

rg -n 'protectMarkdownInlineCode|scanInlineCodeSpans' shortcuts/im/helpers_test.go -A 20

Repository: larksuite/cli

Length of output: 39


🏁 Script executed:

wc -l shortcuts/im/helpers_test.go

Repository: larksuite/cli

Length of output: 90


🏁 Script executed:

rg -n 'func Test.*' shortcuts/im/helpers_test.go | head -50

Repository: larksuite/cli

Length of output: 1671


🏁 Script executed:

rg -n 'normalizeMarkdownEmphasisSpacing|protectMarkdownInlineCode|scanInlineCodeSpans|unmatched.*backtick' shortcuts/im/helpers_test.go -A 10 -B 2

Repository: larksuite/cli

Length of output: 39


🏁 Script executed:

sed -n '319,375p' shortcuts/im/helpers_test.go

Repository: larksuite/cli

Length of output: 1475


🏁 Script executed:

rg -n 'backtick|inline.*code|\`' shortcuts/im/helpers_test.go | head -30

Repository: larksuite/cli

Length of output: 170


🏁 Script executed:

sed -n '906,940p' shortcuts/im/helpers.go

Repository: larksuite/cli

Length of output: 738


🏁 Script executed:

rg -n 'backtick|inline' shortcuts/im/helpers_test.go

Repository: larksuite/cli

Length of output: 210


🏁 Script executed:

grep -n 'Test.*[Mm]arkdown\|Test.*[Ee]mphasis\|Test.*[Cc]ode' shortcuts/im/helpers_test.go

Repository: larksuite/cli

Length of output: 575


🏁 Script executed:

sed -n '538,565p' shortcuts/im/helpers_test.go

Repository: larksuite/cli

Length of output: 1180


🏁 Script executed:

sed -n '506,560p' shortcuts/im/helpers_test.go

Repository: larksuite/cli

Length of output: 2282


🏁 Script executed:

rg -n 'unmatched' shortcuts/im/helpers_test.go

Repository: larksuite/cli

Length of output: 39


🏁 Script executed:

sed -n '822,850p' shortcuts/im/helpers.go

Repository: larksuite/cli

Length of output: 942


🏁 Script executed:

cat -n shortcuts/im/helpers.go | sed -n '908,940p'

Repository: larksuite/cli

Length of output: 812


🏁 Script executed:

python3 << 'EOF'
# Simulate scanInlineCodeSpans behavior with unmatched backticks
line = "hello ` world"  # unmatched opening backtick
spans = []
i = 0
while i < len(line):
    if line[i] != '`':
        i += 1
        continue
    start = i
    while i < len(line) and line[i] == '`':
        i += 1
    delim = line[start:i]
    j = i
    found = False
    while j <= len(line) - len(delim):
        if line[j] == '`':
            k = j
            while k < len(line) and line[k] == '`':
                k += 1
            if k - j == len(delim):
                spans.append((start, k))
                i = k
                found = True
                break
            j = k
        else:
            j += 1
    if not found:
        print(f"No match found for delimiter at {start}: {repr(delim)}")
        print(f"Loop exited with i={i}, continuing outer loop")

print(f"Final spans for '{line}': {spans}")
print()

# Test case: unmatched at start, then balanced backticks later
line2 = "start ` middle `code` end"
spans2 = []
i = 0
while i < len(line2):
    if line2[i] != '`':
        i += 1
        continue
    start = i
    while i < len(line2) and line2[i] == '`':
        i += 1
    delim = line2[start:i]
    j = i
    found = False
    while j <= len(line2) - len(delim):
        if line2[j] == '`':
            k = j
            while k < len(line2) and line2[k] == '`':
                k += 1
            if k - j == len(delim):
                spans2.append((start, k))
                i = k
                found = True
                break
            j = k
        else:
            j += 1
    if not found:
        print(f"No match found for delimiter at {start}: {repr(delim)}")
        print(f"Continuing outer loop from i={i}")

print(f"Final spans for '{line2}': {spans2}")
EOF

Repository: larksuite/cli

Length of output: 304


Add a test case for unmatched backticks in inline code scanning.

The scan logic correctly handles unmatched opening backticks: when no closing run is found, i remains at the post-run position, allowing the outer loop to continue and subsequent backticks on the same line to form spans independently. This fallback behavior matches CommonMark's intent of not requiring the first backtick run to be the code opener.

However, the existing test suite for wrapMarkdownAsPost covers only balanced delimiters (`code` and ``nested``). Add a test case with an unmatched backtick followed by a balanced pair (e.g., "text middle code end"`) to document this behavior and ensure the emphasis normalization skips over it correctly.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@shortcuts/im/helpers.go` around lines 908 - 940, Add a unit test to document
and assert the behavior of scanInlineCodeSpans/wrapMarkdownAsPost when an
unmatched backtick run appears before a balanced code span: create a test (e.g.,
TestWrapMarkdownAsPost_UnmatchedBacktickThenBalanced) that feeds a string like
"text ` middle `code` end" into wrapMarkdownAsPost (or directly into
scanInlineCodeSpans) and assert that the unmatched single backtick does not form
a span and the subsequent "`code`" is detected as a span (and that emphasis
normalization skips the unmatched run); locate code around scanInlineCodeSpans
and wrapMarkdownAsPost to add the test so it fails if the scanner incorrectly
treats the unmatched backtick as a code opener.


func normalizeEmphasisSpacingSegment(seg string) string {
if !strings.Contains(seg, "*") {
return seg
}

var sb strings.Builder
pos := 0
for pos < len(seg) {
openStart, openEnd, ok := nextAsteriskRun(seg, pos)
if !ok {
sb.WriteString(seg[pos:])
break
}

sb.WriteString(seg[pos:openStart])

markerLen := openEnd - openStart
if markerLen != 1 && markerLen != 2 && markerLen != 3 {
sb.WriteString(seg[openStart:openEnd])
pos = openEnd
continue
}

closeStart, closeEnd, ok := nextAsteriskRun(seg, openEnd)
if !ok || closeEnd-closeStart != markerLen {
sb.WriteString(seg[openStart:openEnd])
pos = openEnd
continue
}
if !hasSimpleEmphasisBoundaries(seg, openStart, closeEnd) {
sb.WriteString(seg[openStart:closeEnd])
pos = closeEnd
continue
}

payload := seg[openEnd:closeStart]
normalized, shouldNormalize := normalizeEmphasisPayload(payload)
if !shouldNormalize {
sb.WriteString(seg[openStart:closeEnd])
pos = closeEnd
continue
}

marker := seg[openStart:openEnd]
sb.WriteString(marker)
sb.WriteString(normalized)
sb.WriteString(marker)
pos = closeEnd
}
return sb.String()
}
Comment on lines +942 to +992
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Edge case: list items containing a stray * get rewritten as emphasis.

normalizeEmphasisSpacingSegment doesn't distinguish a leading * that is a list-item marker from an opening emphasis flanker. For lines that start with * and have any later * whose closing boundary is non-word-like, the leading list marker is consumed as an opener and the line is rewritten.

Examples (single line each):

  • * Use the * operator to multiply*Use the* operator to multiply (list item lost)
  • * one item * here*one item* here

Bold/triple-asterisk variants inside such list items still work fine because the marker-length check filters them out — the issue is specific to lines that begin with * + space and contain another single *.

A simple guard would be to skip normalization when openStart == 0 (or when the byte before openStart is \n) AND the opener is followed by whitespace (CommonMark list-item shape ^\* ). The same pattern is what the existing test preserves embedded non emphatic spaced asterisks already relies on — the boundary check just needs the symmetric “opener immediately followed by whitespace” guard.

🛡️ Sketch of guard (add to `hasSimpleEmphasisBoundaries` or its caller)
// Reject list-item-style openers: line-start `*` followed by whitespace.
isLineStart := openStart == 0 || s[openStart-1] == '\n'
if isLineStart && openEnd-openStart == 1 && openEnd < len(s) {
    if r, _ := utf8.DecodeRuneInString(s[openEnd:]); unicode.IsSpace(r) {
        return false
    }
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@shortcuts/im/helpers.go` around lines 942 - 992, The function
normalizeEmphasisSpacingSegment incorrectly treats a leading list-item `* ` as
an emphasis opener; to fix, add a guard (either inside
hasSimpleEmphasisBoundaries or immediately after discovering an opener in
normalizeEmphasisSpacingSegment) that returns false / skips normalization when
openStart is at a line start (openStart == 0 or the byte before openStart is
'\n'), the marker length is 1 (openEnd-openStart == 1) and the byte/rune
immediately after openEnd is whitespace; update hasSimpleEmphasisBoundaries or
the caller logic around the nextAsteriskRun/openStart/openEnd check so
list-item-style `* ` openers are not treated as emphasis openers.


func nextAsteriskRun(s string, start int) (runStart, runEnd int, ok bool) {
for i := start; i < len(s); i++ {
if s[i] != '*' {
continue
}
j := i
for j < len(s) && s[j] == '*' {
j++
}
return i, j, true
}
return 0, 0, false
}

func hasSimpleEmphasisBoundaries(s string, openStart, closeEnd int) bool {
if openStart > 0 {
prev, _ := utf8.DecodeLastRuneInString(s[:openStart])
if isWordLikeRune(prev) {
return false
}
}
if closeEnd < len(s) {
next, _ := utf8.DecodeRuneInString(s[closeEnd:])
if isWordLikeRune(next) {
return false
}
}
return true
}

func isWordLikeRune(r rune) bool {
return unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.Is(unicode.Han, r)
}

func normalizeEmphasisPayload(payload string) (string, bool) {
trimmedLeft := strings.TrimLeftFunc(payload, unicode.IsSpace)
trimmed := strings.TrimRightFunc(trimmedLeft, unicode.IsSpace)
if trimmed == "" {
return payload, false
}

hasLeadingSpace := len(trimmedLeft) != len(payload)
hasTrailingSpace := len(trimmed) != len(trimmedLeft)
if !hasLeadingSpace && !hasTrailingSpace {
return payload, false
}
if strings.Contains(trimmed, "*") {
return payload, false
}
return trimmed, true
}

func shouldUseSegmentedPost(markdown string) bool {
protected, _ := protectMarkdownCodeBlocks(markdown)
return reBlankLineSeparator.MatchString(protected)
Expand Down
44 changes: 44 additions & 0 deletions shortcuts/im/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,50 @@ func TestWrapMarkdownAsPost(t *testing.T) {
}
})

t.Run("normalizes malformed bold spacing", func(t *testing.T) {
got := wrapMarkdownAsPost("hello ** world **")
node := decodePostParagraphForTest(t, got, 0)
if node["text"] != "hello **world**" {
t.Fatalf("wrapMarkdownAsPost() text = %#v, want %q", node["text"], "hello **world**")
}
})

t.Run("normalizes malformed italic and bold italic spacing", func(t *testing.T) {
got := wrapMarkdownAsPost("* italic * and *** both ***")
node := decodePostParagraphForTest(t, got, 0)
if node["text"] != "*italic* and ***both***" {
t.Fatalf("wrapMarkdownAsPost() text = %#v, want %q", node["text"], "*italic* and ***both***")
}
})

t.Run("preserves inline code and fenced code spacing", func(t *testing.T) {
input := "code `** keep **` and prose ** fix **\n```md\n** keep fenced **\n```"
got := wrapMarkdownAsPost(input)
node := decodePostParagraphForTest(t, got, 0)
text, _ := node["text"].(string)
if text != "code `** keep **` and prose **fix**\n```md\n** keep fenced **\n```" {
t.Fatalf("wrapMarkdownAsPost() text = %#v", node["text"])
}
})

t.Run("preserves multiple inline code spans", func(t *testing.T) {
input := "`** keep **` prose ** fix ** ``* keep *`` and * okay *"
got := wrapMarkdownAsPost(input)
node := decodePostParagraphForTest(t, got, 0)
text, _ := node["text"].(string)
if text != "`** keep **` prose **fix** ``* keep *`` and *okay*" {
t.Fatalf("wrapMarkdownAsPost() text = %#v", node["text"])
}
})

t.Run("preserves embedded non emphatic spaced asterisks", func(t *testing.T) {
got := wrapMarkdownAsPost("hello** world **there")
node := decodePostParagraphForTest(t, got, 0)
if node["text"] != "hello** world **there" {
t.Fatalf("wrapMarkdownAsPost() text = %#v", node["text"])
}
})

t.Run("bare URL becomes a tag", func(t *testing.T) {
got := wrapMarkdownAsPost("see https://example.com/flow_id=abc_def done")
if !strings.Contains(got, `"tag":"a"`) {
Expand Down
1 change: 1 addition & 0 deletions skills/lark-im/references/lark-im-messages-reply.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ So `--markdown` is a convenience mode, not a full Markdown compatibility layer.
- Block spacing and line breaks may be normalized during conversion.
- Code blocks are preserved as code blocks.
- Excess blank lines are compressed.
- Emphasis with `*`, `**`, or `***` should not contain inner edge spaces. Use `**bold**` instead of `** bold **`, `** bold**`, or `**bold **`; use `*italic*` instead of `* italic *`, `* italic*`, or `*italic *`.
- Only remote `http://...`, `https://...`, or already-uploaded `img_xxx` Markdown images are kept reliably.
- Local paths in Markdown image syntax like `![x](./a.png)` are **not** auto-uploaded by `--markdown`.
- If remote Markdown image handling fails, that image is removed with a warning.
Expand Down
1 change: 1 addition & 0 deletions skills/lark-im/references/lark-im-messages-send.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ This means `--markdown` is convenient, but it is not a full-fidelity Markdown tr
- Block spacing and line breaks may be normalized during conversion.
- Code blocks are preserved as code blocks.
- Excess blank lines are compressed.
- Emphasis with `*`, `**`, or `***` should not contain inner edge spaces. Use `**bold**` instead of `** bold **`, `** bold**`, or `**bold **`; use `*italic*` instead of `* italic *`, `* italic*`, or `*italic *`.
- Only `http://...`, `https://...`, or already-uploaded `img_xxx` Markdown images are kept reliably.
- Local paths in Markdown image syntax like `![x](./a.png)` are **not** auto-uploaded by `--markdown`; they may be stripped during optimization.
- If remote Markdown image download/upload fails, that image is removed with a warning.
Expand Down
Loading