From 03b9732f9197b8934821f4720e5dc6d61f0bf353 Mon Sep 17 00:00:00 2001 From: baiqing Date: Mon, 20 Apr 2026 15:59:29 +0800 Subject: [PATCH 1/3] feat(doc): add pre-write semantic warnings to docs +update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two static checks run before the MCP update-doc call: 1. replace_* + blank-line markdown: replace_range / replace_all only swap text inside an existing block — a \n\n in the payload will render as literal text, not a paragraph break. Hint to use delete_range + insert_before instead. 2. Combined bold+italic emphases (***text***, **_text_**, _**text**_) cannot round-trip through Lark and are silently downgraded to a single emphasis. Hint to split into two separate emphases. Both warnings go to stderr and never block the update — they inform, not gate. Adds table-driven tests for each check plus an aggregation test, and wires the checks into Execute right before CallMCPTool. Closes the first batch of items from the docs +update pitfalls review (Cases 1 and 5). --- shortcuts/doc/docs_update.go | 17 ++- shortcuts/doc/docs_update_check.go | 83 ++++++++++++ shortcuts/doc/docs_update_check_test.go | 169 ++++++++++++++++++++++++ 3 files changed, 266 insertions(+), 3 deletions(-) create mode 100644 shortcuts/doc/docs_update_check.go create mode 100644 shortcuts/doc/docs_update_check_test.go diff --git a/shortcuts/doc/docs_update.go b/shortcuts/doc/docs_update.go index ea80550ed..6f6b1fef8 100644 --- a/shortcuts/doc/docs_update.go +++ b/shortcuts/doc/docs_update.go @@ -5,6 +5,7 @@ package doc import ( "context" + "fmt" "strings" "github.com/larksuite/cli/shortcuts/common" @@ -89,12 +90,22 @@ var DocsUpdate = common.Shortcut{ Set("mcp_tool", "update-doc").Set("args", args) }, Execute: func(ctx context.Context, runtime *common.RuntimeContext) error { + mode := runtime.Str("mode") + markdown := runtime.Str("markdown") + + // Static semantic checks run before the MCP call so users see + // warnings even if the subsequent request fails. They never block + // execution — the update still proceeds. + for _, w := range docsUpdateWarnings(mode, markdown) { + fmt.Fprintf(runtime.IO().ErrOut, "warning: %s\n", w) + } + args := map[string]interface{}{ "doc_id": runtime.Str("doc"), - "mode": runtime.Str("mode"), + "mode": mode, } - if v := runtime.Str("markdown"); v != "" { - args["markdown"] = v + if markdown != "" { + args["markdown"] = markdown } if v := runtime.Str("selection-with-ellipsis"); v != "" { args["selection_with_ellipsis"] = v diff --git a/shortcuts/doc/docs_update_check.go b/shortcuts/doc/docs_update_check.go new file mode 100644 index 000000000..273f3e07d --- /dev/null +++ b/shortcuts/doc/docs_update_check.go @@ -0,0 +1,83 @@ +// Copyright (c) 2026 Lark Technologies Pte. Ltd. +// SPDX-License-Identifier: MIT + +package doc + +import ( + "regexp" + "strings" +) + +// docsUpdateWarnings returns a list of human-readable warnings for a +// `docs +update` invocation based on static analysis of the mode and +// Markdown payload. The warnings describe CLI/MCP contract edges that +// commonly surprise users; the update is still executed — callers +// decide whether to stop at a warning. +// +// Warnings emitted (current): +// +// 1. replace_* modes do not split blocks. A Markdown payload containing +// a blank line (\n\n) implies the caller expects multiple paragraphs, +// but replace_range / replace_all only swap in-block text. The +// resulting block will contain the blank line as literal text and +// appear as a single paragraph in the UI. +// +// 2. Lark does not round-trip bold+italic. Markdown like ***text*** or +// **_text_** / _**text**_ is stored as only one of the two emphases +// (usually italic), silently dropping the other. The user wanted +// both; they will get one. +func docsUpdateWarnings(mode, markdown string) []string { + var warnings []string + if w := checkDocsUpdateReplaceMultilineMarkdown(mode, markdown); w != "" { + warnings = append(warnings, w) + } + if w := checkDocsUpdateBoldItalic(markdown); w != "" { + warnings = append(warnings, w) + } + return warnings +} + +// checkDocsUpdateReplaceMultilineMarkdown flags markdown that contains a +// blank-line paragraph break under a replace_* mode. Returns an empty +// string when the combination is fine. +func checkDocsUpdateReplaceMultilineMarkdown(mode, markdown string) string { + if mode != "replace_range" && mode != "replace_all" { + return "" + } + // A CR/LF-robust check: both "\n\n" and "\r\n\r\n" count as paragraph + // separators. We normalize line endings once before the substring match. + normalized := strings.ReplaceAll(markdown, "\r\n", "\n") + if !strings.Contains(normalized, "\n\n") { + return "" + } + return "--mode=" + mode + " does not split a block into multiple paragraphs; " + + "the blank line in --markdown will render as literal text. " + + "For multiple paragraphs, use --mode=delete_range followed by --mode=insert_before." +} + +// reBoldItalicTriple matches ***text*** with non-whitespace text between. +var reBoldItalicTriple = regexp.MustCompile(`\*\*\*\S[^*]*?\S\*\*\*|\*\*\*\S\*\*\*`) + +// reBoldItalicUnderscoreInside matches **_text_** — bold wrapping an +// underscore italic. Same downgrade issue in Lark. +var reBoldItalicUnderscoreInside = regexp.MustCompile(`\*\*_\S[^_*]*?\S_\*\*|\*\*_\S_\*\*`) + +// reBoldItalicUnderscoreOutside matches _**text**_ — underscore italic +// wrapping a bold. +var reBoldItalicUnderscoreOutside = regexp.MustCompile(`_\*\*\S[^_*]*?\S\*\*_|_\*\*\S\*\*_`) + +// checkDocsUpdateBoldItalic flags Markdown emphases that attempt to +// combine bold and italic in a way Lark cannot represent. +func checkDocsUpdateBoldItalic(markdown string) string { + if markdown == "" { + return "" + } + if reBoldItalicTriple.MatchString(markdown) || + reBoldItalicUnderscoreInside.MatchString(markdown) || + reBoldItalicUnderscoreOutside.MatchString(markdown) { + return "Lark does not support combined bold+italic markers (***text***, **_text_**, _**text**_); " + + "the emphasis will be downgraded to either bold or italic. " + + "Split into two separate emphases or drop one of them." + } + return "" +} diff --git a/shortcuts/doc/docs_update_check_test.go b/shortcuts/doc/docs_update_check_test.go new file mode 100644 index 000000000..39dbe5336 --- /dev/null +++ b/shortcuts/doc/docs_update_check_test.go @@ -0,0 +1,169 @@ +// Copyright (c) 2026 Lark Technologies Pte. Ltd. +// SPDX-License-Identifier: MIT + +package doc + +import ( + "strings" + "testing" +) + +func TestCheckDocsUpdateReplaceMultilineMarkdown(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + mode string + markdown string + wantHint bool + }{ + { + name: "replace_range with blank line emits hint", + mode: "replace_range", + markdown: "new paragraph\n\nsecond paragraph", + wantHint: true, + }, + { + name: "replace_all with blank line emits hint", + mode: "replace_all", + markdown: "first\n\nsecond", + wantHint: true, + }, + { + name: "replace_range single paragraph is fine", + mode: "replace_range", + markdown: "just a single paragraph of text", + wantHint: false, + }, + { + name: "single newline is not a paragraph break", + mode: "replace_range", + markdown: "line one\nline two", + wantHint: false, + }, + { + name: "crlf paragraph break is also detected", + mode: "replace_range", + markdown: "first\r\n\r\nsecond", + wantHint: true, + }, + { + name: "other modes are not flagged", + mode: "insert_before", + markdown: "first\n\nsecond", + wantHint: false, + }, + { + name: "append mode is not flagged", + mode: "append", + markdown: "first\n\nsecond", + wantHint: false, + }, + { + name: "empty markdown is fine", + mode: "replace_range", + markdown: "", + wantHint: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := checkDocsUpdateReplaceMultilineMarkdown(tt.mode, tt.markdown) + hasHint := got != "" + if hasHint != tt.wantHint { + t.Fatalf("checkDocsUpdateReplaceMultilineMarkdown(%q, %q) = %q, wantHint=%v", + tt.mode, tt.markdown, got, tt.wantHint) + } + if tt.wantHint && !strings.Contains(got, "delete_range") { + t.Errorf("hint should suggest delete_range/insert_before remediation, got: %s", got) + } + }) + } +} + +func TestCheckDocsUpdateBoldItalic(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + wantHint bool + }{ + { + name: "triple asterisks flagged", + input: "a ***key insight*** here", + wantHint: true, + }, + { + name: "triple asterisks single char flagged", + input: "a ***X*** here", + wantHint: true, + }, + { + name: "bold wrapping underscore italic flagged", + input: "note: **_important_** detail", + wantHint: true, + }, + { + name: "underscore wrapping double asterisk flagged", + input: "note: _**important**_ detail", + wantHint: true, + }, + { + name: "plain bold is fine", + input: "this is **bold** text", + wantHint: false, + }, + { + name: "plain italic is fine", + input: "this is *italic* or _italic_ text", + wantHint: false, + }, + { + name: "horizontal rule is not flagged", + input: "paragraph\n\n---\n\nnext", + wantHint: false, + }, + { + name: "bold followed by italic with space is not flagged", + input: "**bold** and *italic*", + wantHint: false, + }, + { + name: "empty input is fine", + input: "", + wantHint: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := checkDocsUpdateBoldItalic(tt.input) + hasHint := got != "" + if hasHint != tt.wantHint { + t.Fatalf("checkDocsUpdateBoldItalic(%q) = %q, wantHint=%v", tt.input, got, tt.wantHint) + } + }) + } +} + +func TestDocsUpdateWarningsAggregates(t *testing.T) { + t.Parallel() + + // Both flags trigger: replace_range with blank line AND triple-asterisk. + warnings := docsUpdateWarnings("replace_range", "***opening***\n\nsecond paragraph") + if len(warnings) != 2 { + t.Fatalf("expected 2 warnings, got %d: %v", len(warnings), warnings) + } +} + +func TestDocsUpdateWarningsEmpty(t *testing.T) { + t.Parallel() + + // Clean markdown in a non-replace mode produces zero warnings. + warnings := docsUpdateWarnings("insert_before", "plain paragraph text") + if len(warnings) != 0 { + t.Fatalf("expected no warnings, got: %v", warnings) + } +} From 199aadb320e3d7ee8454deb9d377d22cf913bb6a Mon Sep 17 00:00:00 2001 From: fangshuyu-768 Date: Tue, 21 Apr 2026 11:21:32 +0800 Subject: [PATCH 2/3] fix(doc): exclude code regions and escaped markers from docs +update checks (#578) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(doc): exclude code regions and escaped markers from docs +update checks Addresses the three review comments on #569: the blank-line paragraph check and the bold+italic emphasis check both operate on the raw markdown string, so fenced code blocks / inline code spans / literal escaped markers produce false-positive warnings on content users expect to pass through verbatim. Changes: - Add proseHasBlankLine(): fence-aware detector that returns true only when a blank line sits outside of ```...``` or ~~~...~~~ regions. Replaces the raw strings.Contains("\n\n") check in checkDocsUpdateReplaceMultilineMarkdown. - Add stripMarkdownCodeRegions(): blanks out fenced code lines and masks inline code spans (via scanInlineCodeSpans from markdown_fix.go) with equal-length whitespace so byte offsets outside the stripped regions are preserved. - Add stripEscapedEmphasisMarkers(): removes "\*" and "\_" so literal sequences like "\***text***" — which CommonMark renders as a literal asterisk plus bold — don't match the combined bold+italic regex. - Wire both helpers into checkDocsUpdateBoldItalic(): the regex now runs on stripEscapedEmphasisMarkers(stripMarkdownCodeRegions(markdown)), so code samples and escaped markers are sanitized away before detection. Shared fence-parsing helpers (codeFenceOpenMarker, isCodeFenceClose, leadingRun) are kept local to this file to avoid touching files outside the scope of the reviewed PR. If a future change wants to reuse them across the doc package, they can be promoted then. Tests: - TestCheckDocsUpdateReplaceMultilineMarkdown: add 4 negative/positive cases — blank line inside backtick and tilde fences (no flag), blank line in prose while fence also has blanks (flag wins), fenced code with no blank lines (no flag). - TestCheckDocsUpdateBoldItalic: add 9 cases — ***text*** / **_text_** / _**text**_ inside fenced code (backtick and tilde), inside inline code spans, and escaped \***text*** / \*\*_text_\*\* (none flagged); plus two positive cases to verify the strip doesn't over-sanitize (real emphasis in prose still fires when inline/fenced code is nearby). * fix(doc): close CommonMark gaps and add three more combined-emphasis shapes Self-review of the first commit turned up three issues: - isCodeFenceClose was strict on exact marker length. Per CommonMark §4.5, a closing fence must be at least as long as the opener, not exactly the same length. A 3-backtick open legitimately closed by a 4-backtick closer (used to embed triple-backticks inside the code sample) was left open-ended, causing the rest of the document to be treated as code and both checks to silently skip it. - Both fence helpers accepted any amount of leading whitespace because they ran on strings.TrimSpace(line). CommonMark allows 0..3 leading spaces before a fence marker; 4+ spaces (or any tab in leading position, which expands to 4 columns) makes the line indented code block content, not a fence open/close. Indented fence-like lines now correctly remain prose and blank lines around them are detected. - The bold/italic check only covered three of the six documented combined-emphasis shapes. Added ___text___, __*text*__, and *__text__* so parity with the asterisk variants is complete. The regex set is now table-driven (combinedEmphasisPatterns) to make adding future shapes a one-line change. Implementation changes: - New fenceIndentOK(line) helper: returns (body, true) for 0..3 leading spaces with no tabs, else (_, false). Used by both codeFenceOpenMarker and isCodeFenceClose. - isCodeFenceClose now counts the fence-char run and accepts any run length >= len(marker), with trailing whitespace only. - checkDocsUpdateBoldItalic replaced three named var regexes with a table of six {shape, re} entries and a single early-exit loop. - Updated docsUpdateWarnings top docstring to list all six shapes. - Noted the known limitation of stripEscapedEmphasisMarkers around doubled backslash escapes ("\\***text***"), which is a false negative we accept in exchange for keeping this a simple string replace. Test additions (docs_update_check_test.go): - Fence close: longer-marker close correctly ends fence; real prose blank after a longer-close fence is still detected. - Indentation: 4-space indented fence-like line is not a fence open, so a surrounding blank line still flags; tab-indented variant same; 3-space indented fence is still a real fence. - New shapes: ___text___ positive + all three negative-guards (fenced code, inline code, escaped); __*text*__ and *__text__* positive + fenced/inline negative-guards; plus two composition tests to ensure the strip does not over-sanitize across the six-regex alternative set. All 53 sub-tests in this file pass; go vet and gofmt are clean. --------- Co-authored-by: fangshuyu-768 --- shortcuts/doc/docs_update_check.go | 252 +++++++++++++++++++++--- shortcuts/doc/docs_update_check_test.go | 206 +++++++++++++++++++ 2 files changed, 431 insertions(+), 27 deletions(-) diff --git a/shortcuts/doc/docs_update_check.go b/shortcuts/doc/docs_update_check.go index 273f3e07d..cf71c1012 100644 --- a/shortcuts/doc/docs_update_check.go +++ b/shortcuts/doc/docs_update_check.go @@ -14,18 +14,26 @@ import ( // commonly surprise users; the update is still executed — callers // decide whether to stop at a warning. // +// Both checks ignore fenced code blocks (```…``` and ~~~…~~~, with up +// to 3 leading spaces per CommonMark §4.5), inline code spans, and +// backslash-escaped emphasis markers so that literal Markdown content +// embedded in code samples or escaped prose does not produce false +// positives. +// // Warnings emitted (current): // // 1. replace_* modes do not split blocks. A Markdown payload containing -// a blank line (\n\n) implies the caller expects multiple paragraphs, -// but replace_range / replace_all only swap in-block text. The -// resulting block will contain the blank line as literal text and -// appear as a single paragraph in the UI. +// a blank line (\n\n) in prose implies the caller expects multiple +// paragraphs, but replace_range / replace_all only swap in-block +// text. The resulting block will contain the blank line as literal +// text and appear as a single paragraph in the UI. // -// 2. Lark does not round-trip bold+italic. Markdown like ***text*** or -// **_text_** / _**text**_ is stored as only one of the two emphases -// (usually italic), silently dropping the other. The user wanted -// both; they will get one. +// 2. Lark does not round-trip bold+italic. Six shapes are detected: +// ***text*** ___text___ +// **_text_** __*text*__ +// _**text**_ *__text__* +// Lark stores only one of the two emphases (usually italic), silently +// dropping the other. The user wanted both; they will get one. func docsUpdateWarnings(mode, markdown string) []string { var warnings []string if w := checkDocsUpdateReplaceMultilineMarkdown(mode, markdown); w != "" { @@ -38,16 +46,17 @@ func docsUpdateWarnings(mode, markdown string) []string { } // checkDocsUpdateReplaceMultilineMarkdown flags markdown that contains a -// blank-line paragraph break under a replace_* mode. Returns an empty -// string when the combination is fine. +// blank-line paragraph break outside fenced code blocks under a replace_* +// mode. Blank lines inside code fences are literal content and don't +// imply paragraph semantics, so they are deliberately ignored. func checkDocsUpdateReplaceMultilineMarkdown(mode, markdown string) string { if mode != "replace_range" && mode != "replace_all" { return "" } // A CR/LF-robust check: both "\n\n" and "\r\n\r\n" count as paragraph - // separators. We normalize line endings once before the substring match. + // separators. We normalize line endings once before detection. normalized := strings.ReplaceAll(markdown, "\r\n", "\n") - if !strings.Contains(normalized, "\n\n") { + if !proseHasBlankLine(normalized) { return "" } return "--mode=" + mode + " does not split a block into multiple paragraphs; " + @@ -55,29 +64,218 @@ func checkDocsUpdateReplaceMultilineMarkdown(mode, markdown string) string { "For multiple paragraphs, use --mode=delete_range followed by --mode=insert_before." } -// reBoldItalicTriple matches ***text*** with non-whitespace text between. -var reBoldItalicTriple = regexp.MustCompile(`\*\*\*\S[^*]*?\S\*\*\*|\*\*\*\S\*\*\*`) +// combinedEmphasisPatterns holds the six documented combined-emphasis shapes +// that Lark downgrades to a single emphasis. Each entry pairs a regex with a +// short shape label for the warning message. The two forms per shape (with +// and without `[^…]*?`) are there because the lazy quantifier needs at least +// one non-delimiter character to match; single-rune payloads (e.g. `***X***`) +// take the second alternation. +var combinedEmphasisPatterns = []struct { + shape string + re *regexp.Regexp +}{ + // Bold+italic with a single delimiter char. + {"***text***", regexp.MustCompile(`\*\*\*\S[^*]*?\S\*\*\*|\*\*\*\S\*\*\*`)}, + {"___text___", regexp.MustCompile(`___\S[^_]*?\S___|___\S___`)}, -// reBoldItalicUnderscoreInside matches **_text_** — bold wrapping an -// underscore italic. Same downgrade issue in Lark. -var reBoldItalicUnderscoreInside = regexp.MustCompile(`\*\*_\S[^_*]*?\S_\*\*|\*\*_\S_\*\*`) + // Bold wrapping italic (asterisk outside). + {"**_text_**", regexp.MustCompile(`\*\*_\S[^_*]*?\S_\*\*|\*\*_\S_\*\*`)}, + {"__*text*__", regexp.MustCompile(`__\*\S[^_*]*?\S\*__|__\*\S\*__`)}, -// reBoldItalicUnderscoreOutside matches _**text**_ — underscore italic -// wrapping a bold. -var reBoldItalicUnderscoreOutside = regexp.MustCompile(`_\*\*\S[^_*]*?\S\*\*_|_\*\*\S\*\*_`) + // Italic wrapping bold (asterisk inside). + {"_**text**_", regexp.MustCompile(`_\*\*\S[^_*]*?\S\*\*_|_\*\*\S\*\*_`)}, + {"*__text__*", regexp.MustCompile(`\*__\S[^_*]*?\S__\*|\*__\S__\*`)}, +} // checkDocsUpdateBoldItalic flags Markdown emphases that attempt to -// combine bold and italic in a way Lark cannot represent. +// combine bold and italic in a way Lark cannot represent. Fenced code +// blocks, inline code spans, and backslash-escaped emphasis markers are +// stripped first so that literal markdown examples ("here is a +// `***keyword***` to flag") do not trigger the warning. func checkDocsUpdateBoldItalic(markdown string) string { if markdown == "" { return "" } - if reBoldItalicTriple.MatchString(markdown) || - reBoldItalicUnderscoreInside.MatchString(markdown) || - reBoldItalicUnderscoreOutside.MatchString(markdown) { - return "Lark does not support combined bold+italic markers (***text***, **_text_**, _**text**_); " + - "the emphasis will be downgraded to either bold or italic. " + - "Split into two separate emphases or drop one of them." + sanitized := stripEscapedEmphasisMarkers(stripMarkdownCodeRegions(markdown)) + for _, p := range combinedEmphasisPatterns { + if p.re.MatchString(sanitized) { + return "Lark does not support combined bold+italic markers " + + "(e.g. ***text***, ___text___, **_text_**, _**text**_, __*text*__, *__text__*); " + + "the emphasis will be downgraded to either bold or italic. " + + "Split into two separate emphases or drop one of them." + } + } + return "" +} + +// proseHasBlankLine reports whether markdown contains a blank line outside +// of fenced code blocks. Blank lines inside ```...``` or ~~~...~~~ fences +// are code content, not paragraph separators, and must not trip the +// "replace_* cannot split paragraphs" warning. +// +// A blank line counts only when it sits between two non-blank boundaries +// (other prose, or a fence open/close). A trailing empty line at EOF is +// not treated as "\n\n". +func proseHasBlankLine(markdown string) bool { + lines := strings.Split(markdown, "\n") + inFence := false + var fenceMarker string + for i, line := range lines { + if inFence { + if isCodeFenceClose(line, fenceMarker) { + inFence = false + fenceMarker = "" + } + continue + } + if marker := codeFenceOpenMarker(line); marker != "" { + inFence = true + fenceMarker = marker + continue + } + if strings.TrimSpace(line) == "" && i > 0 && i+1 < len(lines) { + return true + } + } + return false +} + +// stripMarkdownCodeRegions returns markdown with fenced code blocks blanked +// out and inline code spans replaced by whitespace of equivalent length. +// Byte offsets outside the masked regions are preserved, so follow-on +// regex matches still point at real prose positions. +func stripMarkdownCodeRegions(markdown string) string { + lines := strings.Split(markdown, "\n") + inFence := false + var fenceMarker string + for i, line := range lines { + if inFence { + if isCodeFenceClose(line, fenceMarker) { + inFence = false + fenceMarker = "" + } + lines[i] = "" + continue + } + if marker := codeFenceOpenMarker(line); marker != "" { + inFence = true + fenceMarker = marker + lines[i] = "" + continue + } + lines[i] = maskInlineCodeSpans(line) + } + return strings.Join(lines, "\n") +} + +// maskInlineCodeSpans replaces the byte ranges of any inline code spans in +// line with space characters of equal length. Uses scanInlineCodeSpans from +// markdown_fix.go, which implements the CommonMark §6.1 matching-backtick-run +// rule (so “ `a`b` “ is a single span). +func maskInlineCodeSpans(line string) string { + spans := scanInlineCodeSpans(line) + if len(spans) == 0 { + return line + } + var sb strings.Builder + pos := 0 + for _, loc := range spans { + sb.WriteString(line[pos:loc[0]]) + sb.WriteString(strings.Repeat(" ", loc[1]-loc[0])) + pos = loc[1] + } + sb.WriteString(line[pos:]) + return sb.String() +} + +// stripEscapedEmphasisMarkers removes backslash-escaped '*' and '_' so the +// bold/italic regexes don't treat literal sequences like `\***text***` as +// real combined emphasis. CommonMark renders "\*" as a literal "*" with no +// emphasis semantics; dropping the escape + its target from the detection +// input keeps the heuristic aligned with what the renderer actually does. +// +// Known limitation: a doubled backslash escape ("\\" followed by a real +// emphasis marker, e.g. `\\***text***`) renders as a literal backslash +// followed by genuine combined emphasis, but this strip is not a proper +// parser and will instead consume the second backslash as the opener for +// another escape. That hides the real emphasis from the check, producing +// a false negative. Practical impact is small (this shape is rare in the +// kind of AI-Agent prompts we target) and the alternative — a full +// CommonMark escape parser — is not worth the code surface here. +func stripEscapedEmphasisMarkers(s string) string { + s = strings.ReplaceAll(s, `\*`, "") + s = strings.ReplaceAll(s, `\_`, "") + return s +} + +// codeFenceOpenMarker returns the fence marker (e.g. "```" or "~~~~") if +// line opens a fenced code block, otherwise "". Applies CommonMark §4.5 +// rules: up to 3 leading spaces are tolerated; 4+ leading spaces (or any +// leading tab, which expands to 4 columns) make the line an indented code +// block rather than a fence. +func codeFenceOpenMarker(line string) string { + body, ok := fenceIndentOK(line) + if !ok { + return "" + } + switch { + case strings.HasPrefix(body, "```"): + return leadingRun(body, '`') + case strings.HasPrefix(body, "~~~"): + return leadingRun(body, '~') } return "" } + +// isCodeFenceClose reports whether line closes a fence opened with marker. +// Per CommonMark §4.5 the closer must use the same fence character, be at +// least as long as the opener, sit within 0..3 leading spaces, and carry +// no info-string text. +func isCodeFenceClose(line, marker string) bool { + if marker == "" { + return false + } + body, ok := fenceIndentOK(line) + if !ok { + return false + } + fenceChar := marker[0] + run := leadingRun(body, fenceChar) + if len(run) < len(marker) { + return false + } + return strings.TrimSpace(body[len(run):]) == "" +} + +// fenceIndentOK returns (bodyWithoutLeadingSpaces, true) when line has +// 0..3 leading spaces and no leading tab — i.e. the indentation is +// permissible for a CommonMark fence. Returns ("", false) otherwise +// (4+ leading spaces or any tab), meaning the line must be treated as +// indented code block content rather than a fence boundary. +func fenceIndentOK(line string) (string, bool) { + for i := 0; i < len(line) && i < 4; i++ { + switch line[i] { + case ' ': + continue + case '\t': + return "", false + default: + return line[i:], true + } + } + // Reached index 4 without hitting a non-space character: too indented. + if len(line) >= 4 { + return "", false + } + // Line shorter than 4 chars and all spaces — still valid (empty content). + return "", true +} + +// leadingRun returns the longest prefix of s made up of the byte c. +func leadingRun(s string, c byte) string { + i := 0 + for i < len(s) && s[i] == c { + i++ + } + return s[:i] +} diff --git a/shortcuts/doc/docs_update_check_test.go b/shortcuts/doc/docs_update_check_test.go index 39dbe5336..6208e0540 100644 --- a/shortcuts/doc/docs_update_check_test.go +++ b/shortcuts/doc/docs_update_check_test.go @@ -65,6 +65,78 @@ func TestCheckDocsUpdateReplaceMultilineMarkdown(t *testing.T) { markdown: "", wantHint: false, }, + { + // The check must ignore blank lines inside fenced code; otherwise + // a user replacing one block with a legitimate code sample that + // contains blank lines would see a spurious warning. + name: "blank line inside backtick fenced code is not flagged", + mode: "replace_range", + markdown: "```\nline1\n\nline2\n```", + wantHint: false, + }, + { + name: "blank line inside tilde fenced code is not flagged", + mode: "replace_range", + markdown: "~~~\ncode line one\n\ncode line two\n~~~", + wantHint: false, + }, + { + // Mixed prose + fenced code: any blank line in prose still wins, + // even if the fenced content also contains blanks. + name: "blank line in prose outside fence still flags even when fence has blanks", + mode: "replace_range", + markdown: "first paragraph\n\nsecond paragraph\n\n```\ncode\n\nmore\n```", + wantHint: true, + }, + { + // Fenced code with no blank lines inside must not trip on the + // fence markers themselves. + name: "fenced code with no blank lines does not flag", + mode: "replace_range", + markdown: "prose before\n```go\nfmt.Println(\"hi\")\n```\nprose after", + wantHint: false, + }, + { + // CommonMark §4.5: the closing fence must be ≥ opening fence length. + // A 4-backtick close for a 3-backtick open is a legitimate way to + // embed triple-backticks in a code sample; the check must see the + // fence as properly closed and not treat the rest of the document + // as still-inside-fence. + name: "longer close marker closes fence correctly", + mode: "replace_range", + markdown: "```\nsome code\n````\n\nprose paragraph after", + wantHint: true, // the blank line AFTER the fence is real prose + }, + { + name: "longer close marker still hides blank line inside fence", + mode: "replace_range", + markdown: "```\nbefore\n\nafter\n````", + wantHint: false, + }, + { + // 4+ leading spaces make the line an indented code block, not a + // fence open. The "fence"-looking line is code content; the + // surrounding blank must still be detected. + name: "four-space indented fence-like line is not a fence open", + mode: "replace_range", + markdown: "first paragraph\n\n ```\n code\n ```", + wantHint: true, + }, + { + // A tab in the leading whitespace is always ≥4 columns and thus + // forces indented-code-block semantics. + name: "tab-indented fence-like line is not a fence open", + mode: "replace_range", + markdown: "first paragraph\n\n\t```\n\tcode\n\t```", + wantHint: true, + }, + { + // 3 leading spaces is still within the fence-tolerance window. + name: "three-space indented fence is still a fence", + mode: "replace_range", + markdown: " ```\ncode\n\nmore\n ```", + wantHint: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -135,6 +207,140 @@ func TestCheckDocsUpdateBoldItalic(t *testing.T) { input: "", wantHint: false, }, + { + // The emphasis check must not fire on literal Markdown samples + // inside a fenced code block — the canonical use case is docs + // authors pasting tutorials that demonstrate these exact patterns. + name: "triple asterisks inside backtick fenced code is not flagged", + input: "example:\n```\nthe shape ***keyword*** downgrades\n```", + wantHint: false, + }, + { + name: "underscore-bold inside fenced code is not flagged", + input: "example:\n```markdown\nuse **_strong italic_** carefully\n```", + wantHint: false, + }, + { + name: "bold-underscore inside fenced code is not flagged", + input: "example:\n~~~\n_**outside-underscore**_ is a bad shape\n~~~", + wantHint: false, + }, + { + name: "triple asterisks inside inline code span is not flagged", + input: "the literal `***text***` marker is just a sample", + wantHint: false, + }, + { + name: "underscore-bold inside inline code is not flagged", + input: "the shape `**_italic_**` would downgrade, but only if it were real", + wantHint: false, + }, + { + name: "escaped triple asterisks rendered as literal text is not flagged", + input: `the literal \***text*** with escaped opener`, + wantHint: false, + }, + { + name: "escaped bold inside underscore-italic is not flagged", + input: `shape \*\*_text_\*\* is literal, not emphasis`, + wantHint: false, + }, + { + // Real emphasis outside the code span must still be detected — + // the strip step must not over-sanitize. + name: "real triple asterisks outside inline code still flags", + input: "real ***strong*** and literal `***keyword***` — the first one counts", + wantHint: true, + }, + { + name: "real triple asterisks outside fenced code still flags", + input: "real ***strong***\n\n```\nliteral ***keyword*** in code\n```", + wantHint: true, + }, + // --- Triple-underscore combined emphasis: ___text___ --- + { + name: "triple underscores flagged", + input: "a ___key insight___ here", + wantHint: true, + }, + { + name: "triple underscores single char flagged", + input: "a ___X___ here", + wantHint: true, + }, + { + name: "triple underscores inside fenced code not flagged", + input: "sample:\n```\nuse ___keyword___ carefully\n```", + wantHint: false, + }, + { + name: "triple underscores inside inline code not flagged", + input: "the literal `___phrase___` marker", + wantHint: false, + }, + { + name: "escaped triple underscores not flagged", + input: `literal \___phrase___ with escaped opener`, + wantHint: false, + }, + // --- Underscore-bold wrapping asterisk-italic: __*text*__ --- + { + name: "underscore-bold wrapping asterisk-italic flagged", + input: "note: __*important*__ text", + wantHint: true, + }, + { + name: "underscore-bold wrapping asterisk-italic inside fenced code not flagged", + input: "```\nnote: __*important*__ sample\n```", + wantHint: false, + }, + { + name: "underscore-bold wrapping asterisk-italic inside inline code not flagged", + input: "literal `__*important*__` marker", + wantHint: false, + }, + // --- Asterisk-italic wrapping underscore-bold: *__text__* --- + { + name: "asterisk-italic wrapping underscore-bold flagged", + input: "note: *__phrase__* text", + wantHint: true, + }, + { + name: "asterisk-italic wrapping underscore-bold inside fenced code not flagged", + input: "```md\nnote: *__phrase__* sample\n```", + wantHint: false, + }, + // --- Positive tests: real emphasis in prose coexisting with fake in code --- + { + // Underscore-variant in prose must still fire when an asterisk + // variant appears inside a code span — verifies the strip does + // not over-sanitize across the six regex alternatives. + name: "real triple underscores outside inline code still flag when asterisk variant is in code", + input: "real ___strong___ and literal `***shape***` in code", + wantHint: true, + }, + { + // Longer close fence closes properly; real ***emphasis*** after + // the fence must fire. + name: "real emphasis after a fence closed by longer marker still flags", + input: "```\nliteral ***phrase*** in code\n````\n\nand then real ***phrase*** after", + wantHint: true, + }, + { + // 4-space indented "```" is an indented code block, not a fence + // open. The fence helper should refuse it; emphasis outside the + // (non-existent) fence must still be detected. + name: "four-space indented fence-like line does not open a fence for the emphasis check", + input: "prose\n\n ```\n not a fence\n ```\n\nreal ***strong*** here", + wantHint: true, + }, + { + // 3-space indented fence is valid per CommonMark. Emphasis inside + // must be sanitized away, so the check must not fire. + name: "three-space indented fence still hides triple-asterisk inside", + input: " ```\n literal ***text*** inside\n ```", + wantHint: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { From 8bec6eb85ce459ca9dc19c718edefabad020d191 Mon Sep 17 00:00:00 2001 From: fangshuyu-768 Date: Tue, 21 Apr 2026 12:02:18 +0800 Subject: [PATCH 3/3] fix(doc): address CodeRabbit review on docs +update warnings (#581) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two CodeRabbit nits from #569: 1. Unit test hint assertion only checked for `delete_range` in the remediation message; the companion `insert_before` half of the guidance could regress undetected. Broaden the assertion to require both tokens so a future edit that drops half the remediation produces an immediate test failure. 2. No E2E coverage proved the dry-run contract in the PR description ("Not emitted in dry-run mode — kept quiet during planning"). The helper itself is unit-tested, but nothing caught a regression where a later refactor wired docsUpdateWarnings into the DryRun path. Add tests/cli_e2e/docs/docs_update_dryrun_test.go: TestDocs_UpdateDryRunSuppressesSemanticWarnings invokes `docs +update --dry-run --mode=replace_range --markdown "***x***\n\nb"` — an input crafted to trip BOTH pre-write warnings — and asserts neither the "warning:" prefix, the blank-line message, nor the combined-emphasis message appears on stdout or stderr. Note: the file needs -f to add because .gitignore has a bare `docs/` rule that accidentally matches tests/cli_e2e/docs/. The existing tracked files under that directory predate the rule; new additions have to be force-added until the ignore pattern is narrowed. Not worth rewriting .gitignore for one file. Verified manually that the new E2E fails cleanly when warnings are injected into DryRun and passes again after reverting — the test has real regression-detection power, not just a sticker. Co-authored-by: fangshuyu-768 --- shortcuts/doc/docs_update_check_test.go | 2 +- tests/cli_e2e/docs/docs_update_dryrun_test.go | 70 +++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 tests/cli_e2e/docs/docs_update_dryrun_test.go diff --git a/shortcuts/doc/docs_update_check_test.go b/shortcuts/doc/docs_update_check_test.go index 6208e0540..50905873a 100644 --- a/shortcuts/doc/docs_update_check_test.go +++ b/shortcuts/doc/docs_update_check_test.go @@ -147,7 +147,7 @@ func TestCheckDocsUpdateReplaceMultilineMarkdown(t *testing.T) { t.Fatalf("checkDocsUpdateReplaceMultilineMarkdown(%q, %q) = %q, wantHint=%v", tt.mode, tt.markdown, got, tt.wantHint) } - if tt.wantHint && !strings.Contains(got, "delete_range") { + if tt.wantHint && (!strings.Contains(got, "delete_range") || !strings.Contains(got, "insert_before")) { t.Errorf("hint should suggest delete_range/insert_before remediation, got: %s", got) } }) diff --git a/tests/cli_e2e/docs/docs_update_dryrun_test.go b/tests/cli_e2e/docs/docs_update_dryrun_test.go new file mode 100644 index 000000000..6e0f8cce3 --- /dev/null +++ b/tests/cli_e2e/docs/docs_update_dryrun_test.go @@ -0,0 +1,70 @@ +// Copyright (c) 2026 Lark Technologies Pte. Ltd. +// SPDX-License-Identifier: MIT + +package docs + +import ( + "context" + "strings" + "testing" + "time" + + clie2e "github.com/larksuite/cli/tests/cli_e2e" + "github.com/stretchr/testify/require" +) + +// TestDocs_UpdateDryRunSuppressesSemanticWarnings asserts the contract that +// docsUpdateWarnings is NOT invoked on the --dry-run path. The unit tests in +// shortcuts/doc/docs_update_check_test.go prove the helper emits warnings for +// replace_range + blank-line and for combined-emphasis markers; this E2E +// locks in that they never reach the user during dry-run planning, so a +// future refactor that moves warning emission into a shared code path can't +// silently regress. +// +// Input is intentionally crafted to trigger BOTH warnings the helper emits: +// - mode=replace_range + markdown containing "\n\n" (blank-line warning) +// - markdown containing `***combined***` (combined bold+italic warning) +// +// Neither string may appear in dry-run output. +func TestDocs_UpdateDryRunSuppressesSemanticWarnings(t *testing.T) { + // Fake creds are enough — dry-run short-circuits before any real API call. + t.Setenv("LARKSUITE_CLI_APP_ID", "app") + t.Setenv("LARKSUITE_CLI_APP_SECRET", "secret") + t.Setenv("LARKSUITE_CLI_BRAND", "feishu") + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + t.Cleanup(cancel) + + // "***combined***" is a triple-asterisk combined-emphasis shape; "\n\n" + // is a paragraph break. Both would normally produce warnings when + // Execute runs under --mode=replace_range; both must be absent here. + markdown := "***combined***\n\nsecond paragraph" + + result, err := clie2e.RunCmd(ctx, clie2e.Request{ + Args: []string{ + "docs", "+update", + "--doc", "doxcnDryRunE2E", + "--mode", "replace_range", + "--selection-with-ellipsis", "placeholder", + "--markdown", markdown, + "--dry-run", + }, + DefaultAs: "bot", + }) + require.NoError(t, err) + result.AssertExitCode(t, 0) + + // Neither warning prefix ("warning:") nor either specific warning body + // may appear in dry-run output (stdout OR stderr). + combined := result.Stdout + "\n" + result.Stderr + for _, needle := range []string{ + "warning:", + "does not split a block into multiple paragraphs", + "combined bold+italic markers", + } { + if strings.Contains(combined, needle) { + t.Errorf("dry-run output must not surface pre-write warning %q\nstdout:\n%s\nstderr:\n%s", + needle, result.Stdout, result.Stderr) + } + } +}