larksuite · fangshuyu-768 · Apr 21, 2026 · Apr 20, 2026 · Apr 21, 2026 · Apr 21, 2026
diff --git a/shortcuts/doc/docs_update.go b/shortcuts/doc/docs_update.go
@@ -5,6 +5,7 @@
 
 import (
 	"context"
+	"fmt"
 	"strings"
 
 	"github.com/larksuite/cli/shortcuts/common"
@@ -89,12 +90,22 @@
 			Set("mcp_tool", "update-doc").Set("args", args)
 	},
 	Execute: func(ctx context.Context, runtime *common.RuntimeContext) error {
+		mode := runtime.Str("mode")
+		markdown := runtime.Str("markdown")
+
+		// Static semantic checks run before the MCP call so users see
+		// warnings even if the subsequent request fails. They never block
+		// execution — the update still proceeds.
+		for _, w := range docsUpdateWarnings(mode, markdown) {
+			fmt.Fprintf(runtime.IO().ErrOut, "warning: %s\n", w)
+		}
+
 		args := map[string]interface{}{
 			"doc_id": runtime.Str("doc"),
-			"mode":   runtime.Str("mode"),
+			"mode":   mode,
 		}
-		if v := runtime.Str("markdown"); v != "" {
-			args["markdown"] = v
+		if markdown != "" {
+			args["markdown"] = markdown
 		}
 		if v := runtime.Str("selection-with-ellipsis"); v != "" {
 			args["selection_with_ellipsis"] = v

diff --git a/shortcuts/doc/docs_update_check.go b/shortcuts/doc/docs_update_check.go
@@ -0,0 +1,281 @@
+// Copyright (c) 2026 Lark Technologies Pte. Ltd.
+// SPDX-License-Identifier: MIT
+
+package doc
+
+import (
+	"regexp"
+	"strings"
+)
+
+// docsUpdateWarnings returns a list of human-readable warnings for a
+// `docs +update` invocation based on static analysis of the mode and
+// Markdown payload. The warnings describe CLI/MCP contract edges that
+// commonly surprise users; the update is still executed — callers
+// decide whether to stop at a warning.
+//
+// Both checks ignore fenced code blocks (```…``` and ~~~…~~~, with up
+// to 3 leading spaces per CommonMark §4.5), inline code spans, and
+// backslash-escaped emphasis markers so that literal Markdown content
+// embedded in code samples or escaped prose does not produce false
+// positives.
+//
+// Warnings emitted (current):
+//
+//  1. replace_* modes do not split blocks. A Markdown payload containing
+//     a blank line (\n\n) in prose implies the caller expects multiple
+//     paragraphs, but replace_range / replace_all only swap in-block
+//     text. The resulting block will contain the blank line as literal
+//     text and appear as a single paragraph in the UI.
+//
+//  2. Lark does not round-trip bold+italic. Six shapes are detected:
+//     ***text***   ___text___
+//     **_text_**   __*text*__
+//     _**text**_   *__text__*
+//     Lark stores only one of the two emphases (usually italic), silently
+//     dropping the other. The user wanted both; they will get one.
+func docsUpdateWarnings(mode, markdown string) []string {
+	var warnings []string
+	if w := checkDocsUpdateReplaceMultilineMarkdown(mode, markdown); w != "" {
+		warnings = append(warnings, w)
+	}
+	if w := checkDocsUpdateBoldItalic(markdown); w != "" {
+		warnings = append(warnings, w)
+	}
+	return warnings
+}
+
+// checkDocsUpdateReplaceMultilineMarkdown flags markdown that contains a
+// blank-line paragraph break outside fenced code blocks under a replace_*
+// mode. Blank lines inside code fences are literal content and don't
+// imply paragraph semantics, so they are deliberately ignored.
+func checkDocsUpdateReplaceMultilineMarkdown(mode, markdown string) string {
+	if mode != "replace_range" && mode != "replace_all" {
+		return ""
+	}
+	// A CR/LF-robust check: both "\n\n" and "\r\n\r\n" count as paragraph
+	// separators. We normalize line endings once before detection.
+	normalized := strings.ReplaceAll(markdown, "\r\n", "\n")
+	if !proseHasBlankLine(normalized) {
+		return ""
+	}
+	return "--mode=" + mode + " does not split a block into multiple paragraphs; " +
+		"the blank line in --markdown will render as literal text. " +
+		"For multiple paragraphs, use --mode=delete_range followed by --mode=insert_before."
+}
+
+// combinedEmphasisPatterns holds the six documented combined-emphasis shapes
+// that Lark downgrades to a single emphasis. Each entry pairs a regex with a
+// short shape label for the warning message. The two forms per shape (with
+// and without `[^…]*?`) are there because the lazy quantifier needs at least
+// one non-delimiter character to match; single-rune payloads (e.g. `***X***`)
+// take the second alternation.
+var combinedEmphasisPatterns = []struct {
+	shape string
+	re    *regexp.Regexp
+}{
+	// Bold+italic with a single delimiter char.
+	{"***text***", regexp.MustCompile(`\*\*\*\S[^*]*?\S\*\*\*|\*\*\*\S\*\*\*`)},
+	{"___text___", regexp.MustCompile(`___\S[^_]*?\S___|___\S___`)},
+
+	// Bold wrapping italic (asterisk outside).
+	{"**_text_**", regexp.MustCompile(`\*\*_\S[^_*]*?\S_\*\*|\*\*_\S_\*\*`)},
+	{"__*text*__", regexp.MustCompile(`__\*\S[^_*]*?\S\*__|__\*\S\*__`)},
+
+	// Italic wrapping bold (asterisk inside).
+	{"_**text**_", regexp.MustCompile(`_\*\*\S[^_*]*?\S\*\*_|_\*\*\S\*\*_`)},
+	{"*__text__*", regexp.MustCompile(`\*__\S[^_*]*?\S__\*|\*__\S__\*`)},
+}
+
+// checkDocsUpdateBoldItalic flags Markdown emphases that attempt to
+// combine bold and italic in a way Lark cannot represent. Fenced code
+// blocks, inline code spans, and backslash-escaped emphasis markers are
+// stripped first so that literal markdown examples ("here is a
+// `***keyword***` to flag") do not trigger the warning.
+func checkDocsUpdateBoldItalic(markdown string) string {
+	if markdown == "" {
+		return ""
+	}
+	sanitized := stripEscapedEmphasisMarkers(stripMarkdownCodeRegions(markdown))
+	for _, p := range combinedEmphasisPatterns {
+		if p.re.MatchString(sanitized) {
+			return "Lark does not support combined bold+italic markers " +
+				"(e.g. ***text***, ___text___, **_text_**, _**text**_, __*text*__, *__text__*); " +
+				"the emphasis will be downgraded to either bold or italic. " +
+				"Split into two separate emphases or drop one of them."
+		}
+	}
+	return ""
+}
+
+// proseHasBlankLine reports whether markdown contains a blank line outside
+// of fenced code blocks. Blank lines inside ```...``` or ~~~...~~~ fences
+// are code content, not paragraph separators, and must not trip the
+// "replace_* cannot split paragraphs" warning.
+//
+// A blank line counts only when it sits between two non-blank boundaries
+// (other prose, or a fence open/close). A trailing empty line at EOF is
+// not treated as "\n\n".
+func proseHasBlankLine(markdown string) bool {
+	lines := strings.Split(markdown, "\n")
+	inFence := false
+	var fenceMarker string
+	for i, line := range lines {
+		if inFence {
+			if isCodeFenceClose(line, fenceMarker) {
+				inFence = false
+				fenceMarker = ""
+			}
+			continue
+		}
+		if marker := codeFenceOpenMarker(line); marker != "" {
+			inFence = true
+			fenceMarker = marker
+			continue
+		}
+		if strings.TrimSpace(line) == "" && i > 0 && i+1 < len(lines) {
+			return true
+		}
+	}
+	return false
+}
+
+// stripMarkdownCodeRegions returns markdown with fenced code blocks blanked
+// out and inline code spans replaced by whitespace of equivalent length.
+// Byte offsets outside the masked regions are preserved, so follow-on
+// regex matches still point at real prose positions.
+func stripMarkdownCodeRegions(markdown string) string {
+	lines := strings.Split(markdown, "\n")
+	inFence := false
+	var fenceMarker string
+	for i, line := range lines {
+		if inFence {
+			if isCodeFenceClose(line, fenceMarker) {
+				inFence = false
+				fenceMarker = ""
+			}
+			lines[i] = ""
+			continue
+		}
+		if marker := codeFenceOpenMarker(line); marker != "" {
+			inFence = true
+			fenceMarker = marker
+			lines[i] = ""
+			continue
+		}
+		lines[i] = maskInlineCodeSpans(line)
+	}
+	return strings.Join(lines, "\n")
+}
+
+// maskInlineCodeSpans replaces the byte ranges of any inline code spans in
+// line with space characters of equal length. Uses scanInlineCodeSpans from
+// markdown_fix.go, which implements the CommonMark §6.1 matching-backtick-run
+// rule (so “ `a`b` “ is a single span).
+func maskInlineCodeSpans(line string) string {
+	spans := scanInlineCodeSpans(line)
+	if len(spans) == 0 {
+		return line
+	}
+	var sb strings.Builder
+	pos := 0
+	for _, loc := range spans {
+		sb.WriteString(line[pos:loc[0]])
+		sb.WriteString(strings.Repeat(" ", loc[1]-loc[0]))
+		pos = loc[1]
+	}
+	sb.WriteString(line[pos:])
+	return sb.String()
+}
+
+// stripEscapedEmphasisMarkers removes backslash-escaped '*' and '_' so the
+// bold/italic regexes don't treat literal sequences like `\***text***` as
+// real combined emphasis. CommonMark renders "\*" as a literal "*" with no
+// emphasis semantics; dropping the escape + its target from the detection
+// input keeps the heuristic aligned with what the renderer actually does.
+//
+// Known limitation: a doubled backslash escape ("\\" followed by a real
+// emphasis marker, e.g. `\\***text***`) renders as a literal backslash
+// followed by genuine combined emphasis, but this strip is not a proper
+// parser and will instead consume the second backslash as the opener for
+// another escape. That hides the real emphasis from the check, producing
+// a false negative. Practical impact is small (this shape is rare in the
+// kind of AI-Agent prompts we target) and the alternative — a full
+// CommonMark escape parser — is not worth the code surface here.
+func stripEscapedEmphasisMarkers(s string) string {
+	s = strings.ReplaceAll(s, `\*`, "")
+	s = strings.ReplaceAll(s, `\_`, "")
+	return s
+}
+
+// codeFenceOpenMarker returns the fence marker (e.g. "```" or "~~~~") if
+// line opens a fenced code block, otherwise "". Applies CommonMark §4.5
+// rules: up to 3 leading spaces are tolerated; 4+ leading spaces (or any
+// leading tab, which expands to 4 columns) make the line an indented code
+// block rather than a fence.
+func codeFenceOpenMarker(line string) string {
+	body, ok := fenceIndentOK(line)
+	if !ok {
+		return ""
+	}
+	switch {
+	case strings.HasPrefix(body, "```"):
+		return leadingRun(body, '`')
+	case strings.HasPrefix(body, "~~~"):
+		return leadingRun(body, '~')
+	}
+	return ""
+}
+
+// isCodeFenceClose reports whether line closes a fence opened with marker.
+// Per CommonMark §4.5 the closer must use the same fence character, be at
+// least as long as the opener, sit within 0..3 leading spaces, and carry
+// no info-string text.
+func isCodeFenceClose(line, marker string) bool {
+	if marker == "" {
+		return false
+	}
+	body, ok := fenceIndentOK(line)
+	if !ok {
+		return false
+	}
+	fenceChar := marker[0]
+	run := leadingRun(body, fenceChar)
+	if len(run) < len(marker) {
+		return false
+	}
+	return strings.TrimSpace(body[len(run):]) == ""
+}
+
+// fenceIndentOK returns (bodyWithoutLeadingSpaces, true) when line has
+// 0..3 leading spaces and no leading tab — i.e. the indentation is
+// permissible for a CommonMark fence. Returns ("", false) otherwise
+// (4+ leading spaces or any tab), meaning the line must be treated as
+// indented code block content rather than a fence boundary.
+func fenceIndentOK(line string) (string, bool) {
+	for i := 0; i < len(line) && i < 4; i++ {
+		switch line[i] {
+		case ' ':
+			continue
+		case '\t':
+			return "", false
+		default:
+			return line[i:], true
+		}
+	}
+	// Reached index 4 without hitting a non-space character: too indented.
+	if len(line) >= 4 {
+		return "", false
+	}
+	// Line shorter than 4 chars and all spaces — still valid (empty content).
+	return "", true
+}
+
+// leadingRun returns the longest prefix of s made up of the byte c.
+func leadingRun(s string, c byte) string {
+	i := 0
+	for i < len(s) && s[i] == c {
+		i++
+	}
+	return s[:i]
+}