From b56f2224e0ad00f6b47016ee71587e27cda73aa9 Mon Sep 17 00:00:00 2001 From: fangshuyu-768 Date: Mon, 20 Apr 2026 23:09:37 +0800 Subject: [PATCH] fix(doc): harden +media-insert selection UX on top of #335 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to #335 review: closes a handful of UX and robustness gaps in the new --selection-with-ellipsis flow. - Flag description rewritten to make the "insert at the top-level ancestor" semantics explicit — when the selection is inside a callout, table cell, or nested list, media lands outside that container, not inside. Also calls out the 'start...end' disambiguator. - locate-doc is now called with limit=2 so an ambiguous selection (same phrase in more than one block) surfaces a stderr warning pointing at 'start...end', instead of silently picking the first match. The first-match return behaviour is unchanged. - When the anchor is nested below the root, locateInsertIndex now logs a note to stderr naming the walk depth and the root-level ancestor's insert index. Users don't have to guess why the image landed outside the callout they were editing. - maxDepth bumped 8 → 32 with a comment explaining the invariants: `visited` is the real cycle guard, `maxDepth` is belt-and-suspenders. 32 comfortably exceeds real docx nesting depth so a deeply-nested but well-formed anchor is no longer silently rejected. - Comment added before the parent-walk loop noting why the API calls are serial (each level's parent_id is only known after the previous GET returns; can't be batched or parallelised). Tests: - TestLocateInsertIndexWarnsOnMultipleMatches: stubs two matches, asserts the stderr warning names the ambiguity and mentions 'start...end', and that the first-match insert index is unchanged. - TestLocateInsertIndexLogsNestedAnchor: anchor two levels below root, asserts stderr carries the "nested … top-level ancestor" note. - TestLocateInsertIndexCycleDetection: malformed parent chain with blk_x.parent = blk_y and blk_y.parent = blk_x, neither reachable from root. Registering a single GET /blocks/blk_y stub also bounds the call count — a regression that broke `visited` tracking would either hang or fail via httpmock's extra-call guard. --- shortcuts/doc/doc_media_insert.go | 44 ++++- shortcuts/doc/doc_media_insert_test.go | 253 +++++++++++++++++++++++++ 2 files changed, 292 insertions(+), 5 deletions(-) diff --git a/shortcuts/doc/doc_media_insert.go b/shortcuts/doc/doc_media_insert.go index 4c902f46e..a3c9dbd11 100644 --- a/shortcuts/doc/doc_media_insert.go +++ b/shortcuts/doc/doc_media_insert.go @@ -46,7 +46,7 @@ var DocMediaInsert = common.Shortcut{ {Name: "type", Default: "image", Desc: "type: image | file"}, {Name: "align", Desc: "alignment: left | center | right"}, {Name: "caption", Desc: "image caption text"}, - {Name: "selection-with-ellipsis", Desc: "plain text (or 'start...end') that identifies the target block; the media is inserted after that block by default"}, + {Name: "selection-with-ellipsis", Desc: "plain text (or 'start...end' to disambiguate) matching the target block's content. Media is inserted at the top-level ancestor of the matched block — i.e., when the selection is inside a callout, table cell, or nested list, media lands outside that container, not inside it. Pass 'start...end' (a unique prefix and suffix separated by '...') when the plain text appears in more than one block"}, {Name: "before", Type: "bool", Desc: "insert before the matched block instead of after (requires --selection-with-ellipsis)"}, {Name: "file-view", Desc: "file block rendering: card (default) | preview | inline; only applies when --type=file. preview renders audio/video as an inline player"}, }, @@ -440,10 +440,12 @@ func extractAppendTarget(rootData map[string]interface{}, fallbackBlockID string // parent_id chain (using single-block GET calls when needed) to resolve nested // blocks to their top-level ancestor in rootChildren. func locateInsertIndex(runtime *common.RuntimeContext, documentID string, selection string, rootChildren []interface{}, before bool) (int, error) { + // Ask for 2 matches so we can warn when the selection is ambiguous. locate-doc + // orders matches by document position, so matches[0] is still deterministic. args := map[string]interface{}{ "doc_id": documentID, "selection_with_ellipsis": selection, - "limit": 1, + "limit": 2, } result, err := common.CallMCPTool(runtime, "locate-doc", args) if err != nil { @@ -459,6 +461,15 @@ func locateInsertIndex(runtime *common.RuntimeContext, documentID string, select "check spelling or use 'start...end' syntax to narrow the selection", ) } + if len(matches) > 1 { + // Silently picking the first match surprises users whose selection appears + // in more than one block (e.g. the same phrase in a title and a paragraph). + // Surface that another match exists and point at the 'start...end' disambiguator. + fmt.Fprintf(runtime.IO().ErrOut, + "warning: selection (%s) matched more than one block; inserting relative to the first. "+ + "Pass --selection-with-ellipsis 'start...end' to narrow.\n", + redactSelection(selection)) + } matchMap, _ := matches[0].(map[string]interface{}) anchorBlockID := common.GetString(matchMap, "anchor_block_id") @@ -484,12 +495,21 @@ func locateInsertIndex(runtime *common.RuntimeContext, documentID string, select } } - // Walk up the parent chain. locate-doc already gives us one level of parent, - // so most cases need zero extra API calls. + // Walk up the parent chain to the top-level ancestor in rootChildren. This + // is serial by nature: each level's parent_id is only known after the + // previous level's GET /blocks/{id} response arrives, so the calls cannot + // be batched or parallelised. + // + // visited is the real cycle guard — it stops an A→B→A parent-id loop (seen + // on malformed API responses) after one lap. maxDepth is belt-and-suspenders + // in case both visited tracking and parent_id sanity simultaneously break; + // 32 comfortably exceeds the deepest real docx nesting (~6–8 levels for + // quote/callout/list combinations) without letting a bug run unbounded. cur := anchorBlockID nextParent := parentBlockID visited := map[string]bool{} - const maxDepth = 8 + const maxDepth = 32 + walkDepth := 0 for depth := 0; depth < maxDepth; depth++ { if visited[cur] { break @@ -497,6 +517,19 @@ func locateInsertIndex(runtime *common.RuntimeContext, documentID string, select visited[cur] = true if idx, ok := rootSet[cur]; ok { + if walkDepth > 0 { + // The anchor was nested inside a callout / table cell / list and + // got resolved to its top-level ancestor. Surface this so users + // don't misread "insert before 'X'" as "insert right next to X" + // when X is buried several levels deep. + posLabel := "after" + if before { + posLabel = "before" + } + fmt.Fprintf(runtime.IO().ErrOut, + "note: selection (%s) was nested %d level(s) deep; inserting %s its top-level ancestor at index %d\n", + redactSelection(selection), walkDepth, posLabel, idx) + } if before { return idx, nil } @@ -522,6 +555,7 @@ func locateInsertIndex(runtime *common.RuntimeContext, documentID string, select break } cur = parent + walkDepth++ } return 0, output.ErrWithHint( diff --git a/shortcuts/doc/doc_media_insert_test.go b/shortcuts/doc/doc_media_insert_test.go index c97c29d22..e5e9cd2fb 100644 --- a/shortcuts/doc/doc_media_insert_test.go +++ b/shortcuts/doc/doc_media_insert_test.go @@ -731,3 +731,256 @@ func TestDocMediaInsertValidateFileView(t *testing.T) { }) } } + +// TestLocateInsertIndexWarnsOnMultipleMatches verifies that when locate-doc +// returns more than one match, a warning is written to stderr pointing the user +// at the 'start...end' disambiguation syntax. Silently picking the first match +// of an ambiguous selection is a real UX trap — users who edit documents with +// repeated phrases (a heading that also appears in the TOC, for example) get +// no signal that another match existed. +func TestLocateInsertIndexWarnsOnMultipleMatches(t *testing.T) { + f, _, stderr, reg := cmdutil.TestFactory(t, docsTestConfigWithAppID("locate-multi-app")) + + docID := "doxcnMULTI" + reg.Register(&httpmock.Stub{ + Method: "GET", + URL: "/open-apis/docx/v1/documents/" + docID + "/blocks/" + docID, + Body: map[string]interface{}{ + "code": 0, "msg": "ok", + "data": map[string]interface{}{ + "block": map[string]interface{}{ + "block_id": docID, + "children": []interface{}{"blk_a", "blk_b"}, + }, + }, + }, + }) + // Two matches — same selection appears in two different root-level blocks. + // locate-doc orders matches by document position, so matches[0] is still + // deterministic (blk_a) even with limit=2. + reg.Register(&httpmock.Stub{ + Method: "POST", + URL: "mcp.feishu.cn/mcp", + Body: buildLocateDocMCPResponse([]map[string]interface{}{ + {"anchor_block_id": "blk_a", "parent_block_id": docID}, + {"anchor_block_id": "blk_b", "parent_block_id": docID}, + }), + }) + createStub := &httpmock.Stub{ + Method: "POST", + URL: "/open-apis/docx/v1/documents/" + docID + "/blocks/" + docID + "/children", + Body: map[string]interface{}{ + "code": 0, "msg": "ok", + "data": map[string]interface{}{ + "children": []interface{}{ + map[string]interface{}{"block_id": "blk_new", "block_type": 27, "image": map[string]interface{}{}}, + }, + }, + }, + } + reg.Register(createStub) + reg.Register(&httpmock.Stub{ + Method: "POST", + URL: "/open-apis/drive/v1/medias/upload_all", + Body: map[string]interface{}{ + "code": 0, "msg": "ok", + "data": map[string]interface{}{"file_token": "ftok_multi"}, + }, + }) + reg.Register(&httpmock.Stub{ + Method: "PATCH", + URL: "/open-apis/docx/v1/documents/" + docID + "/blocks/batch_update", + Body: map[string]interface{}{"code": 0, "msg": "ok", "data": map[string]interface{}{}}, + }) + + tmpDir := t.TempDir() + withDocsWorkingDir(t, tmpDir) + writeSizedDocTestFile(t, "img.png", 100) + + err := mountAndRunDocs(t, DocMediaInsert, []string{ + "+media-insert", + "--doc", docID, + "--file", "img.png", + "--selection-with-ellipsis", "Repeated phrase", + "--as", "bot", + }, f, nil) + if err != nil { + t.Fatalf("Execute() error: %v", err) + } + + // Warning should name the ambiguity and point at 'start...end'. + stderrOut := stderr.String() + if !strings.Contains(stderrOut, "matched more than one block") { + t.Errorf("stderr missing multi-match warning; got:\n%s", stderrOut) + } + if !strings.Contains(stderrOut, "start...end") { + t.Errorf("stderr missing 'start...end' disambiguation hint; got:\n%s", stderrOut) + } + // Should still insert at the first match (blk_a at index 0) → after ⇒ 1. + assertCreateBlockIndex(t, createStub, 1) +} + +// TestLocateInsertIndexLogsNestedAnchor verifies that when the matched block is +// nested (not a direct root child), a note is written to stderr explaining that +// the media lands at the top-level ancestor. This protects users from being +// surprised when selecting text inside a callout or table cell and seeing the +// image appear outside that container. +func TestLocateInsertIndexLogsNestedAnchor(t *testing.T) { + f, _, stderr, reg := cmdutil.TestFactory(t, docsTestConfigWithAppID("locate-nested-log-app")) + + docID := "doxcnNESTEDLOG" + // Same shape as TestLocateInsertIndexNestedBlockViaExecute: anchor is two + // levels below root, so walkDepth == 2 when we hit the root ancestor. + reg.Register(&httpmock.Stub{ + Method: "GET", + URL: "/open-apis/docx/v1/documents/" + docID + "/blocks/" + docID, + Body: map[string]interface{}{ + "code": 0, "msg": "ok", + "data": map[string]interface{}{ + "block": map[string]interface{}{ + "block_id": docID, + "children": []interface{}{"blk_section", "blk_other"}, + }, + }, + }, + }) + reg.Register(&httpmock.Stub{ + Method: "POST", + URL: "mcp.feishu.cn/mcp", + Body: buildLocateDocMCPResponse([]map[string]interface{}{ + {"anchor_block_id": "blk_grandchild", "parent_block_id": "blk_section_child"}, + }), + }) + reg.Register(&httpmock.Stub{ + Method: "GET", + URL: "/open-apis/docx/v1/documents/" + docID + "/blocks/blk_section_child", + Body: map[string]interface{}{ + "code": 0, "msg": "ok", + "data": map[string]interface{}{ + "block": map[string]interface{}{ + "block_id": "blk_section_child", + "parent_id": "blk_section", + }, + }, + }, + }) + reg.Register(&httpmock.Stub{ + Method: "POST", + URL: "/open-apis/docx/v1/documents/" + docID + "/blocks/" + docID + "/children", + Body: map[string]interface{}{ + "code": 0, "msg": "ok", + "data": map[string]interface{}{ + "children": []interface{}{ + map[string]interface{}{"block_id": "blk_new", "block_type": 27, "image": map[string]interface{}{}}, + }, + }, + }, + }) + reg.Register(&httpmock.Stub{ + Method: "POST", + URL: "/open-apis/drive/v1/medias/upload_all", + Body: map[string]interface{}{ + "code": 0, "msg": "ok", + "data": map[string]interface{}{"file_token": "ftok_nested_log"}, + }, + }) + reg.Register(&httpmock.Stub{ + Method: "PATCH", + URL: "/open-apis/docx/v1/documents/" + docID + "/blocks/batch_update", + Body: map[string]interface{}{"code": 0, "msg": "ok", "data": map[string]interface{}{}}, + }) + + tmpDir := t.TempDir() + withDocsWorkingDir(t, tmpDir) + writeSizedDocTestFile(t, "img.png", 100) + + err := mountAndRunDocs(t, DocMediaInsert, []string{ + "+media-insert", + "--doc", docID, + "--file", "img.png", + "--selection-with-ellipsis", "nested content", + "--as", "bot", + }, f, nil) + if err != nil { + t.Fatalf("Execute() error: %v", err) + } + + stderrOut := stderr.String() + if !strings.Contains(stderrOut, "nested") || !strings.Contains(stderrOut, "top-level ancestor") { + t.Errorf("stderr missing nested-anchor note; got:\n%s", stderrOut) + } +} + +// TestLocateInsertIndexCycleDetection verifies that a malformed parent chain +// (blk_x.parent = blk_y and blk_y.parent = blk_x, neither reachable from root) +// does not spin the locate-doc walk forever. The `visited` map must break the +// cycle, and the user must see the "not reachable from document root" error +// rather than the process hanging. Without this test, a regression that broke +// cycle protection would only surface in production with a stalled CLI. +func TestLocateInsertIndexCycleDetection(t *testing.T) { + f, _, _, reg := cmdutil.TestFactory(t, docsTestConfigWithAppID("locate-cycle-app")) + + docID := "doxcnCYCLE" + // Root has unrelated children — neither blk_x nor blk_y reach root. + reg.Register(&httpmock.Stub{ + Method: "GET", + URL: "/open-apis/docx/v1/documents/" + docID + "/blocks/" + docID, + Body: map[string]interface{}{ + "code": 0, "msg": "ok", + "data": map[string]interface{}{ + "block": map[string]interface{}{ + "block_id": docID, + "children": []interface{}{"blk_unrelated_a", "blk_unrelated_b"}, + }, + }, + }, + }) + // locate-doc hints parent_block_id = blk_y for anchor blk_x (first hop consumed). + reg.Register(&httpmock.Stub{ + Method: "POST", + URL: "mcp.feishu.cn/mcp", + Body: buildLocateDocMCPResponse([]map[string]interface{}{ + {"anchor_block_id": "blk_x", "parent_block_id": "blk_y"}, + }), + }) + // blk_y claims blk_x as parent — closes the cycle. The walk must land here + // exactly once before visited[blk_x] triggers a break. + blkYStub := &httpmock.Stub{ + Method: "GET", + URL: "/open-apis/docx/v1/documents/" + docID + "/blocks/blk_y", + Body: map[string]interface{}{ + "code": 0, "msg": "ok", + "data": map[string]interface{}{ + "block": map[string]interface{}{ + "block_id": "blk_y", + "parent_id": "blk_x", + }, + }, + }, + } + reg.Register(blkYStub) + + tmpDir := t.TempDir() + withDocsWorkingDir(t, tmpDir) + writeSizedDocTestFile(t, "img.png", 100) + + err := mountAndRunDocs(t, DocMediaInsert, []string{ + "+media-insert", + "--doc", docID, + "--file", "img.png", + "--selection-with-ellipsis", "cyclic anchor", + "--as", "bot", + }, f, nil) + if err == nil { + t.Fatal("expected 'block_not_reachable' error from cyclic parent chain; got nil") + } + if !strings.Contains(err.Error(), "not reachable") && !strings.Contains(err.Error(), "block_not_reachable") { + t.Fatalf("unexpected error — want cycle-bounded 'not reachable', got: %v", err) + } + // blk_y should be fetched exactly once. Registering just one stub for it + // already enforces an upper bound (httpmock errors on extra calls), so if + // the walk looped more than once the test harness would fail differently. + if blkYStub.CapturedHeaders == nil && blkYStub.CapturedBody == nil { + t.Errorf("expected the walk to fetch blk_y once; stub was not hit") + } +}