Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions shortcuts/doc/docs_update.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import (
"context"
"fmt"
"strings"

"github.com/larksuite/cli/shortcuts/common"
Expand Down Expand Up @@ -89,12 +90,22 @@
Set("mcp_tool", "update-doc").Set("args", args)
},
Execute: func(ctx context.Context, runtime *common.RuntimeContext) error {
mode := runtime.Str("mode")
markdown := runtime.Str("markdown")

Check warning on line 94 in shortcuts/doc/docs_update.go

View check run for this annotation

Codecov / codecov/patch

shortcuts/doc/docs_update.go#L93-L94

Added lines #L93 - L94 were not covered by tests

// Static semantic checks run before the MCP call so users see
// warnings even if the subsequent request fails. They never block
// execution — the update still proceeds.
for _, w := range docsUpdateWarnings(mode, markdown) {
fmt.Fprintf(runtime.IO().ErrOut, "warning: %s\n", w)

Check warning on line 100 in shortcuts/doc/docs_update.go

View check run for this annotation

Codecov / codecov/patch

shortcuts/doc/docs_update.go#L99-L100

Added lines #L99 - L100 were not covered by tests
}

args := map[string]interface{}{
"doc_id": runtime.Str("doc"),
"mode": runtime.Str("mode"),
"mode": mode,

Check warning on line 105 in shortcuts/doc/docs_update.go

View check run for this annotation

Codecov / codecov/patch

shortcuts/doc/docs_update.go#L105

Added line #L105 was not covered by tests
}
if v := runtime.Str("markdown"); v != "" {
args["markdown"] = v
if markdown != "" {
args["markdown"] = markdown

Check warning on line 108 in shortcuts/doc/docs_update.go

View check run for this annotation

Codecov / codecov/patch

shortcuts/doc/docs_update.go#L107-L108

Added lines #L107 - L108 were not covered by tests
}
if v := runtime.Str("selection-with-ellipsis"); v != "" {
args["selection_with_ellipsis"] = v
Expand Down
281 changes: 281 additions & 0 deletions shortcuts/doc/docs_update_check.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT

package doc

import (
"regexp"
"strings"
)

// docsUpdateWarnings returns a list of human-readable warnings for a
// `docs +update` invocation based on static analysis of the mode and
// Markdown payload. The warnings describe CLI/MCP contract edges that
// commonly surprise users; the update is still executed — callers
// decide whether to stop at a warning.
//
// Both checks ignore fenced code blocks (```…``` and ~~~…~~~, with up
// to 3 leading spaces per CommonMark §4.5), inline code spans, and
// backslash-escaped emphasis markers so that literal Markdown content
// embedded in code samples or escaped prose does not produce false
// positives.
//
// Warnings emitted (current):
//
// 1. replace_* modes do not split blocks. A Markdown payload containing
// a blank line (\n\n) in prose implies the caller expects multiple
// paragraphs, but replace_range / replace_all only swap in-block
// text. The resulting block will contain the blank line as literal
// text and appear as a single paragraph in the UI.
//
// 2. Lark does not round-trip bold+italic. Six shapes are detected:
// ***text*** ___text___
// **_text_** __*text*__
// _**text**_ *__text__*
// Lark stores only one of the two emphases (usually italic), silently
// dropping the other. The user wanted both; they will get one.
func docsUpdateWarnings(mode, markdown string) []string {
var warnings []string
if w := checkDocsUpdateReplaceMultilineMarkdown(mode, markdown); w != "" {
warnings = append(warnings, w)
}
if w := checkDocsUpdateBoldItalic(markdown); w != "" {
warnings = append(warnings, w)
}
return warnings
}

// checkDocsUpdateReplaceMultilineMarkdown flags markdown that contains a
// blank-line paragraph break outside fenced code blocks under a replace_*
// mode. Blank lines inside code fences are literal content and don't
// imply paragraph semantics, so they are deliberately ignored.
func checkDocsUpdateReplaceMultilineMarkdown(mode, markdown string) string {
if mode != "replace_range" && mode != "replace_all" {
return ""
}
// A CR/LF-robust check: both "\n\n" and "\r\n\r\n" count as paragraph
// separators. We normalize line endings once before detection.
normalized := strings.ReplaceAll(markdown, "\r\n", "\n")
if !proseHasBlankLine(normalized) {
return ""
}
return "--mode=" + mode + " does not split a block into multiple paragraphs; " +
"the blank line in --markdown will render as literal text. " +
"For multiple paragraphs, use --mode=delete_range followed by --mode=insert_before."
}

// combinedEmphasisPatterns holds the six documented combined-emphasis shapes
// that Lark downgrades to a single emphasis. Each entry pairs a regex with a
// short shape label for the warning message. The two forms per shape (with
// and without `[^…]*?`) are there because the lazy quantifier needs at least
// one non-delimiter character to match; single-rune payloads (e.g. `***X***`)
// take the second alternation.
var combinedEmphasisPatterns = []struct {
shape string
re *regexp.Regexp
}{
// Bold+italic with a single delimiter char.
{"***text***", regexp.MustCompile(`\*\*\*\S[^*]*?\S\*\*\*|\*\*\*\S\*\*\*`)},
{"___text___", regexp.MustCompile(`___\S[^_]*?\S___|___\S___`)},

// Bold wrapping italic (asterisk outside).
{"**_text_**", regexp.MustCompile(`\*\*_\S[^_*]*?\S_\*\*|\*\*_\S_\*\*`)},
{"__*text*__", regexp.MustCompile(`__\*\S[^_*]*?\S\*__|__\*\S\*__`)},

// Italic wrapping bold (asterisk inside).
{"_**text**_", regexp.MustCompile(`_\*\*\S[^_*]*?\S\*\*_|_\*\*\S\*\*_`)},
{"*__text__*", regexp.MustCompile(`\*__\S[^_*]*?\S__\*|\*__\S__\*`)},
}

// checkDocsUpdateBoldItalic flags Markdown emphases that attempt to
// combine bold and italic in a way Lark cannot represent. Fenced code
// blocks, inline code spans, and backslash-escaped emphasis markers are
// stripped first so that literal markdown examples ("here is a
// `***keyword***` to flag") do not trigger the warning.
func checkDocsUpdateBoldItalic(markdown string) string {
if markdown == "" {
return ""
}
sanitized := stripEscapedEmphasisMarkers(stripMarkdownCodeRegions(markdown))
for _, p := range combinedEmphasisPatterns {
if p.re.MatchString(sanitized) {
return "Lark does not support combined bold+italic markers " +
"(e.g. ***text***, ___text___, **_text_**, _**text**_, __*text*__, *__text__*); " +
"the emphasis will be downgraded to either bold or italic. " +
"Split into two separate emphases or drop one of them."
}
}
return ""
}

// proseHasBlankLine reports whether markdown contains a blank line outside
// of fenced code blocks. Blank lines inside ```...``` or ~~~...~~~ fences
// are code content, not paragraph separators, and must not trip the
// "replace_* cannot split paragraphs" warning.
//
// A blank line counts only when it sits between two non-blank boundaries
// (other prose, or a fence open/close). A trailing empty line at EOF is
// not treated as "\n\n".
func proseHasBlankLine(markdown string) bool {
lines := strings.Split(markdown, "\n")
inFence := false
var fenceMarker string
for i, line := range lines {
if inFence {
if isCodeFenceClose(line, fenceMarker) {
inFence = false
fenceMarker = ""
}
continue
}
if marker := codeFenceOpenMarker(line); marker != "" {
inFence = true
fenceMarker = marker
continue
}
if strings.TrimSpace(line) == "" && i > 0 && i+1 < len(lines) {
return true
}
}
return false
}

// stripMarkdownCodeRegions returns markdown with fenced code blocks blanked
// out and inline code spans replaced by whitespace of equivalent length.
// Byte offsets outside the masked regions are preserved, so follow-on
// regex matches still point at real prose positions.
func stripMarkdownCodeRegions(markdown string) string {
lines := strings.Split(markdown, "\n")
inFence := false
var fenceMarker string
for i, line := range lines {
if inFence {
if isCodeFenceClose(line, fenceMarker) {
inFence = false
fenceMarker = ""
}
lines[i] = ""
continue
}
if marker := codeFenceOpenMarker(line); marker != "" {
inFence = true
fenceMarker = marker
lines[i] = ""
continue
}
lines[i] = maskInlineCodeSpans(line)
}
return strings.Join(lines, "\n")
}

// maskInlineCodeSpans replaces the byte ranges of any inline code spans in
// line with space characters of equal length. Uses scanInlineCodeSpans from
// markdown_fix.go, which implements the CommonMark §6.1 matching-backtick-run
// rule (so “ `a`b` “ is a single span).
func maskInlineCodeSpans(line string) string {
spans := scanInlineCodeSpans(line)
if len(spans) == 0 {
return line
}
var sb strings.Builder
pos := 0
for _, loc := range spans {
sb.WriteString(line[pos:loc[0]])
sb.WriteString(strings.Repeat(" ", loc[1]-loc[0]))
pos = loc[1]
}
sb.WriteString(line[pos:])
return sb.String()
}

// stripEscapedEmphasisMarkers removes backslash-escaped '*' and '_' so the
// bold/italic regexes don't treat literal sequences like `\***text***` as
// real combined emphasis. CommonMark renders "\*" as a literal "*" with no
// emphasis semantics; dropping the escape + its target from the detection
// input keeps the heuristic aligned with what the renderer actually does.
//
// Known limitation: a doubled backslash escape ("\\" followed by a real
// emphasis marker, e.g. `\\***text***`) renders as a literal backslash
// followed by genuine combined emphasis, but this strip is not a proper
// parser and will instead consume the second backslash as the opener for
// another escape. That hides the real emphasis from the check, producing
// a false negative. Practical impact is small (this shape is rare in the
// kind of AI-Agent prompts we target) and the alternative — a full
// CommonMark escape parser — is not worth the code surface here.
func stripEscapedEmphasisMarkers(s string) string {
s = strings.ReplaceAll(s, `\*`, "")
s = strings.ReplaceAll(s, `\_`, "")
return s
}

// codeFenceOpenMarker returns the fence marker (e.g. "```" or "~~~~") if
// line opens a fenced code block, otherwise "". Applies CommonMark §4.5
// rules: up to 3 leading spaces are tolerated; 4+ leading spaces (or any
// leading tab, which expands to 4 columns) make the line an indented code
// block rather than a fence.
func codeFenceOpenMarker(line string) string {
body, ok := fenceIndentOK(line)
if !ok {
return ""
}
switch {
case strings.HasPrefix(body, "```"):
return leadingRun(body, '`')
case strings.HasPrefix(body, "~~~"):
return leadingRun(body, '~')
}
return ""
}

// isCodeFenceClose reports whether line closes a fence opened with marker.
// Per CommonMark §4.5 the closer must use the same fence character, be at
// least as long as the opener, sit within 0..3 leading spaces, and carry
// no info-string text.
func isCodeFenceClose(line, marker string) bool {
if marker == "" {
return false

Check warning on line 236 in shortcuts/doc/docs_update_check.go

View check run for this annotation

Codecov / codecov/patch

shortcuts/doc/docs_update_check.go#L236

Added line #L236 was not covered by tests
}
body, ok := fenceIndentOK(line)
if !ok {
return false

Check warning on line 240 in shortcuts/doc/docs_update_check.go

View check run for this annotation

Codecov / codecov/patch

shortcuts/doc/docs_update_check.go#L240

Added line #L240 was not covered by tests
}
fenceChar := marker[0]
run := leadingRun(body, fenceChar)
if len(run) < len(marker) {
return false
}
return strings.TrimSpace(body[len(run):]) == ""
}

// fenceIndentOK returns (bodyWithoutLeadingSpaces, true) when line has
// 0..3 leading spaces and no leading tab — i.e. the indentation is
// permissible for a CommonMark fence. Returns ("", false) otherwise
// (4+ leading spaces or any tab), meaning the line must be treated as
// indented code block content rather than a fence boundary.
func fenceIndentOK(line string) (string, bool) {
for i := 0; i < len(line) && i < 4; i++ {
switch line[i] {
case ' ':
continue
case '\t':
return "", false

Check warning on line 261 in shortcuts/doc/docs_update_check.go

View check run for this annotation

Codecov / codecov/patch

shortcuts/doc/docs_update_check.go#L260-L261

Added lines #L260 - L261 were not covered by tests
default:
return line[i:], true
}
}
// Reached index 4 without hitting a non-space character: too indented.
if len(line) >= 4 {
return "", false
}
// Line shorter than 4 chars and all spaces — still valid (empty content).
return "", true
}

// leadingRun returns the longest prefix of s made up of the byte c.
func leadingRun(s string, c byte) string {
i := 0
for i < len(s) && s[i] == c {
i++
}
return s[:i]
}
Loading
Loading