larksuite · fangshuyu-768 · Apr 21, 2026 · Apr 20, 2026 · Apr 21, 2026
diff --git a/shortcuts/doc/docs_update_check.go b/shortcuts/doc/docs_update_check.go
@@ -14,18 +14,26 @@ import (
 // commonly surprise users; the update is still executed — callers
 // decide whether to stop at a warning.
 //
+// Both checks ignore fenced code blocks (```…``` and ~~~…~~~, with up
+// to 3 leading spaces per CommonMark §4.5), inline code spans, and
+// backslash-escaped emphasis markers so that literal Markdown content
+// embedded in code samples or escaped prose does not produce false
+// positives.
+//
 // Warnings emitted (current):
 //
 //  1. replace_* modes do not split blocks. A Markdown payload containing
-//     a blank line (\n\n) implies the caller expects multiple paragraphs,
-//     but replace_range / replace_all only swap in-block text. The
-//     resulting block will contain the blank line as literal text and
-//     appear as a single paragraph in the UI.
+//     a blank line (\n\n) in prose implies the caller expects multiple
+//     paragraphs, but replace_range / replace_all only swap in-block
+//     text. The resulting block will contain the blank line as literal
+//     text and appear as a single paragraph in the UI.
 //
-//  2. Lark does not round-trip bold+italic. Markdown like ***text*** or
-//     **_text_** / _**text**_ is stored as only one of the two emphases
-//     (usually italic), silently dropping the other. The user wanted
-//     both; they will get one.
+//  2. Lark does not round-trip bold+italic. Six shapes are detected:
+//     ***text***   ___text___
+//     **_text_**   __*text*__
+//     _**text**_   *__text__*
+//     Lark stores only one of the two emphases (usually italic), silently
+//     dropping the other. The user wanted both; they will get one.
 func docsUpdateWarnings(mode, markdown string) []string {
 	var warnings []string
 	if w := checkDocsUpdateReplaceMultilineMarkdown(mode, markdown); w != "" {
@@ -38,46 +46,236 @@ func docsUpdateWarnings(mode, markdown string) []string {
 }
 
 // checkDocsUpdateReplaceMultilineMarkdown flags markdown that contains a
-// blank-line paragraph break under a replace_* mode. Returns an empty
-// string when the combination is fine.
+// blank-line paragraph break outside fenced code blocks under a replace_*
+// mode. Blank lines inside code fences are literal content and don't
+// imply paragraph semantics, so they are deliberately ignored.
 func checkDocsUpdateReplaceMultilineMarkdown(mode, markdown string) string {
 	if mode != "replace_range" && mode != "replace_all" {
 		return ""
 	}
 	// A CR/LF-robust check: both "\n\n" and "\r\n\r\n" count as paragraph
-	// separators. We normalize line endings once before the substring match.
+	// separators. We normalize line endings once before detection.
 	normalized := strings.ReplaceAll(markdown, "\r\n", "\n")
-	if !strings.Contains(normalized, "\n\n") {
+	if !proseHasBlankLine(normalized) {
 		return ""
 	}
 	return "--mode=" + mode + " does not split a block into multiple paragraphs; " +
 		"the blank line in --markdown will render as literal text. " +
 		"For multiple paragraphs, use --mode=delete_range followed by --mode=insert_before."
 }
 
-// reBoldItalicTriple matches ***text*** with non-whitespace text between.
-var reBoldItalicTriple = regexp.MustCompile(`\*\*\*\S[^*]*?\S\*\*\*|\*\*\*\S\*\*\*`)
+// combinedEmphasisPatterns holds the six documented combined-emphasis shapes
+// that Lark downgrades to a single emphasis. Each entry pairs a regex with a
+// short shape label for the warning message. The two forms per shape (with
+// and without `[^…]*?`) are there because the lazy quantifier needs at least
+// one non-delimiter character to match; single-rune payloads (e.g. `***X***`)
+// take the second alternation.
+var combinedEmphasisPatterns = []struct {
+	shape string
+	re    *regexp.Regexp
+}{
+	// Bold+italic with a single delimiter char.
+	{"***text***", regexp.MustCompile(`\*\*\*\S[^*]*?\S\*\*\*|\*\*\*\S\*\*\*`)},
+	{"___text___", regexp.MustCompile(`___\S[^_]*?\S___|___\S___`)},
 
-// reBoldItalicUnderscoreInside matches **_text_** — bold wrapping an
-// underscore italic. Same downgrade issue in Lark.
-var reBoldItalicUnderscoreInside = regexp.MustCompile(`\*\*_\S[^_*]*?\S_\*\*|\*\*_\S_\*\*`)
+	// Bold wrapping italic (asterisk outside).
+	{"**_text_**", regexp.MustCompile(`\*\*_\S[^_*]*?\S_\*\*|\*\*_\S_\*\*`)},
+	{"__*text*__", regexp.MustCompile(`__\*\S[^_*]*?\S\*__|__\*\S\*__`)},
 
-// reBoldItalicUnderscoreOutside matches _**text**_ — underscore italic
-// wrapping a bold.
-var reBoldItalicUnderscoreOutside = regexp.MustCompile(`_\*\*\S[^_*]*?\S\*\*_|_\*\*\S\*\*_`)
+	// Italic wrapping bold (asterisk inside).
+	{"_**text**_", regexp.MustCompile(`_\*\*\S[^_*]*?\S\*\*_|_\*\*\S\*\*_`)},
+	{"*__text__*", regexp.MustCompile(`\*__\S[^_*]*?\S__\*|\*__\S__\*`)},
+}
 
 // checkDocsUpdateBoldItalic flags Markdown emphases that attempt to
-// combine bold and italic in a way Lark cannot represent.
+// combine bold and italic in a way Lark cannot represent. Fenced code
+// blocks, inline code spans, and backslash-escaped emphasis markers are
+// stripped first so that literal markdown examples ("here is a
+// `***keyword***` to flag") do not trigger the warning.
 func checkDocsUpdateBoldItalic(markdown string) string {
 	if markdown == "" {
 		return ""
 	}
-	if reBoldItalicTriple.MatchString(markdown) ||
-		reBoldItalicUnderscoreInside.MatchString(markdown) ||
-		reBoldItalicUnderscoreOutside.MatchString(markdown) {
-		return "Lark does not support combined bold+italic markers (***text***, **_text_**, _**text**_); " +
-			"the emphasis will be downgraded to either bold or italic. " +
-			"Split into two separate emphases or drop one of them."
+	sanitized := stripEscapedEmphasisMarkers(stripMarkdownCodeRegions(markdown))
+	for _, p := range combinedEmphasisPatterns {
+		if p.re.MatchString(sanitized) {
+			return "Lark does not support combined bold+italic markers " +
+				"(e.g. ***text***, ___text___, **_text_**, _**text**_, __*text*__, *__text__*); " +
+				"the emphasis will be downgraded to either bold or italic. " +
+				"Split into two separate emphases or drop one of them."
+		}
+	}
+	return ""
+}
+
+// proseHasBlankLine reports whether markdown contains a blank line outside
+// of fenced code blocks. Blank lines inside ```...``` or ~~~...~~~ fences
+// are code content, not paragraph separators, and must not trip the
+// "replace_* cannot split paragraphs" warning.
+//
+// A blank line counts only when it sits between two non-blank boundaries
+// (other prose, or a fence open/close). A trailing empty line at EOF is
+// not treated as "\n\n".
+func proseHasBlankLine(markdown string) bool {
+	lines := strings.Split(markdown, "\n")
+	inFence := false
+	var fenceMarker string
+	for i, line := range lines {
+		if inFence {
+			if isCodeFenceClose(line, fenceMarker) {
+				inFence = false
+				fenceMarker = ""
+			}
+			continue
+		}
+		if marker := codeFenceOpenMarker(line); marker != "" {
+			inFence = true
+			fenceMarker = marker
+			continue
+		}
+		if strings.TrimSpace(line) == "" && i > 0 && i+1 < len(lines) {
+			return true
+		}
+	}
+	return false
+}
+
+// stripMarkdownCodeRegions returns markdown with fenced code blocks blanked
+// out and inline code spans replaced by whitespace of equivalent length.
+// Byte offsets outside the masked regions are preserved, so follow-on
+// regex matches still point at real prose positions.
+func stripMarkdownCodeRegions(markdown string) string {
+	lines := strings.Split(markdown, "\n")
+	inFence := false
+	var fenceMarker string
+	for i, line := range lines {
+		if inFence {
+			if isCodeFenceClose(line, fenceMarker) {
+				inFence = false
+				fenceMarker = ""
+			}
+			lines[i] = ""
+			continue
+		}
+		if marker := codeFenceOpenMarker(line); marker != "" {
+			inFence = true
+			fenceMarker = marker
+			lines[i] = ""
+			continue
+		}
+		lines[i] = maskInlineCodeSpans(line)
+	}
+	return strings.Join(lines, "\n")
+}
+
+// maskInlineCodeSpans replaces the byte ranges of any inline code spans in
+// line with space characters of equal length. Uses scanInlineCodeSpans from
+// markdown_fix.go, which implements the CommonMark §6.1 matching-backtick-run
+// rule (so “ `a`b` “ is a single span).
+func maskInlineCodeSpans(line string) string {
+	spans := scanInlineCodeSpans(line)
+	if len(spans) == 0 {
+		return line
+	}
+	var sb strings.Builder
+	pos := 0
+	for _, loc := range spans {
+		sb.WriteString(line[pos:loc[0]])
+		sb.WriteString(strings.Repeat(" ", loc[1]-loc[0]))
+		pos = loc[1]
+	}
+	sb.WriteString(line[pos:])
+	return sb.String()
+}
+
+// stripEscapedEmphasisMarkers removes backslash-escaped '*' and '_' so the
+// bold/italic regexes don't treat literal sequences like `\***text***` as
+// real combined emphasis. CommonMark renders "\*" as a literal "*" with no
+// emphasis semantics; dropping the escape + its target from the detection
+// input keeps the heuristic aligned with what the renderer actually does.
+//
+// Known limitation: a doubled backslash escape ("\\" followed by a real
+// emphasis marker, e.g. `\\***text***`) renders as a literal backslash
+// followed by genuine combined emphasis, but this strip is not a proper
+// parser and will instead consume the second backslash as the opener for
+// another escape. That hides the real emphasis from the check, producing
+// a false negative. Practical impact is small (this shape is rare in the
+// kind of AI-Agent prompts we target) and the alternative — a full
+// CommonMark escape parser — is not worth the code surface here.
+func stripEscapedEmphasisMarkers(s string) string {
+	s = strings.ReplaceAll(s, `\*`, "")
+	s = strings.ReplaceAll(s, `\_`, "")
+	return s
+}
+
+// codeFenceOpenMarker returns the fence marker (e.g. "```" or "~~~~") if
+// line opens a fenced code block, otherwise "". Applies CommonMark §4.5
+// rules: up to 3 leading spaces are tolerated; 4+ leading spaces (or any
+// leading tab, which expands to 4 columns) make the line an indented code
+// block rather than a fence.
+func codeFenceOpenMarker(line string) string {
+	body, ok := fenceIndentOK(line)
+	if !ok {
+		return ""
+	}
+	switch {
+	case strings.HasPrefix(body, "```"):
+		return leadingRun(body, '`')
+	case strings.HasPrefix(body, "~~~"):
+		return leadingRun(body, '~')
 	}
 	return ""
 }
+
+// isCodeFenceClose reports whether line closes a fence opened with marker.
+// Per CommonMark §4.5 the closer must use the same fence character, be at
+// least as long as the opener, sit within 0..3 leading spaces, and carry
+// no info-string text.
+func isCodeFenceClose(line, marker string) bool {
+	if marker == "" {
+		return false
+	}
+	body, ok := fenceIndentOK(line)
+	if !ok {
+		return false
+	}
+	fenceChar := marker[0]
+	run := leadingRun(body, fenceChar)
+	if len(run) < len(marker) {
+		return false
+	}
+	return strings.TrimSpace(body[len(run):]) == ""
+}
+
+// fenceIndentOK returns (bodyWithoutLeadingSpaces, true) when line has
+// 0..3 leading spaces and no leading tab — i.e. the indentation is
+// permissible for a CommonMark fence. Returns ("", false) otherwise
+// (4+ leading spaces or any tab), meaning the line must be treated as
+// indented code block content rather than a fence boundary.
+func fenceIndentOK(line string) (string, bool) {
+	for i := 0; i < len(line) && i < 4; i++ {
+		switch line[i] {
+		case ' ':
+			continue
+		case '\t':
+			return "", false
+		default:
+			return line[i:], true
+		}
+	}
+	// Reached index 4 without hitting a non-space character: too indented.
+	if len(line) >= 4 {
+		return "", false
+	}
+	// Line shorter than 4 chars and all spaces — still valid (empty content).
+	return "", true
+}
+
+// leadingRun returns the longest prefix of s made up of the byte c.
+func leadingRun(s string, c byte) string {
+	i := 0
+	for i < len(s) && s[i] == c {
+		i++
+	}
+	return s[:i]
+}