From 02769f7d31f7b52ea279c361fc02809c707103f4 Mon Sep 17 00:00:00 2001
From: yaozhen00 <yaozhen.00@bytedance.com>
Date: Thu, 9 Apr 2026 00:57:51 +0800
Subject: [PATCH 1/2] feat(test): optimize cli-e2e-testcase-writer skill add
 coverage.md

---
 .gitignore                                    |   1 +
 .../cli_e2e/cli-e2e-testcase-writer/SKILL.md  | 254 ++++++------------
 tests/cli_e2e/demo/coverage.md                |  42 +++
 tests/cli_e2e/task/coverage.md                |  50 ++++
 4 files changed, 172 insertions(+), 175 deletions(-)
 create mode 100644 tests/cli_e2e/demo/coverage.md
 create mode 100644 tests/cli_e2e/task/coverage.md

diff --git a/.gitignore b/.gitignore
index 9df212e40..534dd75fe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,3 +34,4 @@ tests/mail/reports/
 # Generated / test artifacts
 internal/registry/meta_data.json
 cmd/api/download.bin
+app.log
diff --git a/tests/cli_e2e/cli-e2e-testcase-writer/SKILL.md b/tests/cli_e2e/cli-e2e-testcase-writer/SKILL.md
index 7b0e3335f..c6bef62b7 100644
--- a/tests/cli_e2e/cli-e2e-testcase-writer/SKILL.md
+++ b/tests/cli_e2e/cli-e2e-testcase-writer/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: cli-e2e-testcase-writer
-description: Write scenario-based end-to-end Go testcases for the compiled `lark-cli` binary under `tests/cli_e2e`. Use when adding or updating a CLI testcase that should autonomously explore help and schema output, build a self-contained lifecycle with `clie2e.RunCmd`, organize steps with `t.Run`, clean up with `t.Cleanup`, and assert JSON output with `testify/assert` and `gjson`.
+description: Use when adding or updating Go CLI E2E coverage for one `tests/cli_e2e/{domain}` domain of the compiled `lark-cli`, especially when the work requires live `--help` or `schema` exploration, scenario-based `clie2e.RunCmd` workflows, and per-domain `coverage.md` maintenance.
 metadata:
   requires:
     bins: ["lark-cli"]
@@ -8,211 +8,115 @@ metadata:
 
 # CLI E2E Testcase Writer
 
-Write testcase code, not framework code. `tests/cli_e2e/core.go` already provides the harness, and `tests/cli_e2e/demo/task_lifecycle_test.go` is the reference example only. Unless the user explicitly asks for framework work, add or update testcase files only.
+Work on one domain per run. Produce exactly two artifacts for that domain:
+- workflow testcase files under `tests/cli_e2e/{domain}/`
+- `tests/cli_e2e/{domain}/coverage.md`
 
-## What a good testcase looks like
+Focus on domain testcase files. Do not change shared E2E support code such as `tests/cli_e2e/core.go` unless the user explicitly asks. Treat `tests/cli_e2e/demo/` as reference only.
 
-A good cli e2e testcase here is:
-- scenario-based, not a loose smoke test
-- self-contained and data-consistent
-  create the resource you later read, update, search, or delete
-- broad enough to prove the workflow
-  usually create plus one or more follow-up reads or mutations plus teardown
-- scoped to one feature or one workflow
-  do not turn one testcase into the entire domain
-- written with normal Go testing primitives
+## Core standard
 
-This is different from traditional API test suites where usage docs live elsewhere. Here, the command contract is discoverable from `lark-cli --help`, domain help, subcommand help, and schema output, and the agent is expected to explore and verify it autonomously.
+- Make the testcase scenario-based and self-contained.
+- Prove one workflow end to end: create plus follow-up read, or mutate plus teardown.
+- Prefer one file per workflow or one closely related feature.
+- For mutable flows, prove persisted state with read-after-write assertions, not just exit code.
+- Leave prerequisite-heavy paths uncovered when they cannot be proven, and explain why in `coverage.md`.
 
-## File organization
+## Workflow
 
-Put real domain testcases under:
-
-```text
-tests/cli_e2e/{domain}/
-```
-
-Examples:
-- `tests/cli_e2e/task/task_status_workflow_test.go`
-- `tests/cli_e2e/task/task_comment_workflow_test.go`
-
-Treat `tests/cli_e2e/demo/` as reference material, not as the place to accumulate real coverage.
-
-## How to split cases
-
-Split by feature or workflow, not by API surface inventory.
-
-Good splits:
-- one file for task status flow: `create -> complete -> get -> reopen -> get`
-- one file for task comment flow
-- one file for task reminder flow
-- one file for tasklist association flow
-
-Bad split:
-- one giant `task_test.go` that creates a task, updates it, comments it, reminds it, assigns it, adds followers, attaches tasklists, and queries everything in one lifecycle
-
-Prefer:
-- one top-level test per workflow
-- one file per workflow or per closely related feature
-- small shared helpers in the same domain test package when setup/cleanup logic truly repeats
-
-## Explore before writing
-
-Do not guess command names, flags, or payload fields from memory. Discover them:
+### 1. Explore the live CLI before writing code
 
 ```bash
 lark-cli --help
 lark-cli <domain> --help
 lark-cli <domain> +<shortcut> -h
-lark-cli <domain> <resource> <method> -h
-lark-cli schema <domain>.<resource>.<method>
+lark-cli <domain> <group> --help
+lark-cli <domain> <group> <method> -h
+lark-cli schema <domain>.<group>.<method>
 ```
 
-Use this exploration loop repeatedly while writing the testcase:
-1. find the right domain and command path
-2. decide whether the scenario should use a shortcut or a resource method
-3. inspect the exact `--params` and `--data` shape
-4. run the draft testcase
-5. inspect failures, then go back to help or schema and refine
-
-Also inspect environmental constraints before finalizing coverage:
-- whether the current test environment supports `bot`, `user`, or both
-- whether the scenario needs external identities, preexisting groups, documents, chats, or other remote fixtures
-- whether the command path is actually executable in CI-like conditions
-
-## Use the harness directly
-
-Call `clie2e.RunCmd` with `clie2e.Request`.
-
-```go
-result, err := clie2e.RunCmd(ctx, clie2e.Request{
-	Args: []string{"task", "tasks", "get"},
-	Params: map[string]any{
-		"task_guid": taskGUID,
-	},
-})
-require.NoError(t, err)
-result.AssertExitCode(t, 0)
-result.AssertStdoutStatus(t, 0)
-```
+### 2. Count leaf commands for the denominator
 
-Use `Request` like this:
-- `Args`: command path and plain flags
-- `Params`: JSON for `--params`
-- `Data`: JSON for `--data`
-- `BinaryPath`, `DefaultAs`, `Format`: only when the testcase must override defaults
-
-## Default testcase shape
-
-Use one top-level test per workflow. Break the workflow into substeps with `t.Run`.
-
-```go
-func TestDomain_Scenario(t *testing.T) {
-	parentT := t
-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
-	t.Cleanup(cancel)
-
-	suffix := time.Now().UTC().Format("20060102-150405")
-	var resourceID string
-
-	t.Run("create", func(t *testing.T) {
-		result, err := clie2e.RunCmd(ctx, clie2e.Request{...})
-		require.NoError(t, err)
-		result.AssertExitCode(t, 0)
-		result.AssertStdoutStatus(t, true)
-		resourceID = gjson.Get(result.Stdout, "data.guid").String()
-		require.NotEmpty(t, resourceID)
-
-		parentT.Cleanup(func() {
-			// best-effort delete
-		})
-	})
-
-	t.Run("get", func(t *testing.T) {
-		require.NotEmpty(t, resourceID)
-	})
-}
-```
+- A leaf command is one that executes an action — it has no further subcommands.
+- If `lark-cli <domain> <group> --help` lists no subcommands, `<group>` itself is the leaf.
+- Count `task +create` as one leaf and `task tasks get` as one leaf.
+- Do not count parameter combinations.
+- Reuse coverage already present under `tests/cli_e2e/{domain}/`. Do not count `tests/cli_e2e/demo/`.
 
-Use this shape because:
-- `t.Run` makes reports readable
-- `parentT.Cleanup` keeps created resources alive for later substeps
-- one testcase owns one full resource lifecycle
+### 3. Choose the proof surface before editing
 
-## Data self-consistency
+Identify the provable risks for the touched workflow: invalid input, missing prerequisite, identity or permission, state transition, output shape, cleanup safety. If only the happy path is testable, document the blocked risk areas in `coverage.md`.
 
-Prefer workflows whose data can be created and cleaned up entirely within the testcase.
+### 4. Add or update the workflow testcase
 
-Good:
-- create a task, then get/update/comment/delete that same task
-- create a tasklist, then add a task created by the testcase
+- Use `clie2e.RunCmd(ctx, clie2e.Request{...})`.
+- Put command path and plain flags in `Args`; put JSON in `Params` (URL/path parameters) and `Data` (request body).
+- Prefer one top-level test per workflow with `t.Run` substeps.
+- Register teardown on `parentT.Cleanup` so it survives subtest failures.
+- When touching an existing command, verify the JSON response shape is stable: assert status type, field paths, and identifiers consumed by later steps before changing assertions.
 
-Be explicit when the data is not self-consistent:
-- if a testcase needs a real user open_id, preexisting chat, existing document, or tenant-specific fixture, do not invent one
-- call out the missing prerequisite to the user
-- if you still want to leave a reference testcase in code, write it with `t.Skip()` and a short reason
+### 5. Run and iterate
 
-Example:
+Run `go test ./tests/cli_e2e/{domain} -count=1` while iterating and before finishing. If command shape or behavior is unclear, re-check help or schema (step 1) before changing assertions.
 
-```go
-func TestTask_AssignWorkflow_UserOnly(t *testing.T) {
-	t.Skip("requires a real user open_id and user-capable test environment")
-}
-```
+### 6. Refresh the domain outputs
 
-Do not silently hardcode made-up IDs, fake URLs, or guessed remote resources just to make the testcase look complete.
+- Update the workflow testcase files.
+- Update `coverage.md`: recompute the denominator from live help output, mark each command as `shortcut` or `api`, and keep one command table for the whole domain.
 
-## Environment constraints
+## Testcase rules
 
-Assume the current local/CI-like environment may support only `bot` identity by default.
+- Override `BinaryPath`, `DefaultAs`, or `Format` on `clie2e.Request` only when the testcase truly needs it.
+- Use `require.NoError`, `result.AssertExitCode`, `result.AssertStdoutStatus`, `assert`, and `gjson`.
+- Shortcut responses (`{ok: bool}`) assert `true`; API responses (`{code: int}`) assert `0`.
+- Use `t.Helper()` only for setup or assertion helpers that are called from multiple tests.
+- Use table-driven tests only when the scenario shape repeats across inputs.
+- For expected failures, assert stderr content and exit code when the environment makes them deterministic.
+- If identity or external fixtures cannot be proven, leave the command uncovered and document the prerequisite rather than faking confidence.
 
-Implications:
-- do not assume `--as user` works
-- commands or workflows that require user identity may be unsupported in the current environment
-- confirm this by checking help, running the command, or using known repo guidance before writing the final testcase set
+## coverage.md
 
-When `--as user` is unavailable:
-- still implement bot-compatible workflows normally
-- for user-only workflows, either stop and tell the user what prerequisite is missing, or leave a skipped testcase with `t.Skip()`
+Keep `coverage.md` brief and mechanical. Include:
+- a domain-specific H1 title
+- a metrics section with denominator, covered count, and coverage rate
+- a summary section restating each `Test...` workflow, key `t.Run(...)` proof points, and main blockers
+- one command table for all commands
 
-Typical risky areas:
-- `+get-my-tasks`
-- commands that require current-user profile or self identity lookup
-- workflows that need a real user open_id for assign/follower/member mutations
+Recommended structure:
 
-## Go testing rules
+```markdown
+# <Domain> CLI E2E Coverage
 
-- Use `t.Run` for lifecycle steps such as `create`, `update`, `get`, `list`, `delete`.
-- Use `t.Cleanup` for teardown and shared cleanup.
-- Use `t.Helper()` in local helpers when the same setup or assertion logic really repeats.
-- Use table-driven tests only when the same scenario shape repeats across multiple inputs. Do not force table-driven style onto a single live workflow.
-- Use `require.NoError` for command execution and prerequisites.
-- Use `assert` for returned field values after the command has succeeded.
-- Use `gjson.Get(result.Stdout, "...")` for JSON field extraction.
+## Metrics
+- Denominator: N leaf commands
+- Covered: N
+- Coverage: N%
 
-## Output conventions
+## Summary
+- TestXxx: ... key `t.Run(...)` proof points ...
+- Blocked area: ...
 
-- shortcut-style commands often return `{"ok": true, ...}` and should use `result.AssertStdoutStatus(t, true)`
-- service-style commands often return `{"code": 0, "data": ...}` and should use `result.AssertStdoutStatus(t, 0)`
+## Command Table
+| Status | Cmd | Type | Testcase | Key parameter shapes | Notes / uncovered reason |
+| --- | --- | --- | --- | --- | --- |
+| ✓ | task +create | shortcut | task_status_workflow_test.go::TestTask_StatusWorkflow | basic create; create with due | |
+| ✕ | task +assign | shortcut |  | none | requires real user open_id |
+```
 
-Then assert the business fields with `gjson`.
+- Mark each command `shortcut` or `api`.
+- Write testcase entries in `go test -run` friendly form.
+- Commands only exercised in `parentT.Cleanup` teardown are not counted as covered.
+- Do not split covered and uncovered commands into separate sections.
 
-## Common mistakes
+## Guardrails
 
-- Do not modify `tests/cli_e2e/core.go` just because one testcase wants a convenience wrapper.
-- Do not write a testcase that depends on preexisting remote data.
-- Do not put agent, model, or vendor brand names into task summaries, comments, tasklist names, fixture IDs, or other visible remote test data; use neutral prefixes such as `lark-cli-e2e-` or `<domain>-e2e-`.
-- Do not attach cleanup to the create subtest if later subtests still need the resource.
+- Run as bot identity only; do not assume `--as user` works.
 - Do not place new real coverage under `tests/cli_e2e/demo/`.
-- Do not dump all domain behaviors into one file or one testcase.
-- Do not hardcode obvious defaults unless the command really needs explicit flags.
-- Do not guess `Params` or `Data` fields when schema output can tell you the exact shape.
-- Do not fabricate prerequisite data when the scenario needs real external fixtures.
-- Do not force a user-only workflow to run in a bot-only environment; use `t.Skip()` with a concrete reason.
-- Do not stop after the first draft. Run, inspect, explore again, and improve the testcase.
-
-## Validation
-
-- Run `go test ./tests/cli_e2e/... -count=1`.
-- Rerun the touched package directly when the testcase is live and slow.
-- If behavior is unclear, go back to help and schema before changing the testcase.
+- Do not depend on preexisting remote data.
+- Do not fabricate open_ids, chats, docs, or other remote fixtures.
+- Prefer deterministic negative cases over tenant-dependent assertions.
+- Do not guess `Params` or `Data` fields when help or schema can tell you the exact shape.
+- Do not hardcode obvious defaults unless the command truly requires explicit flags.
+- Do not put agent, model, or vendor brand names in visible remote test data; use neutral prefixes such as `lark-cli-e2e-` or `<domain>-e2e-`.
+- A command is covered only when the testcase asserts returned fields or persisted state, not just exit code.
+- Cleanup-only execution is not primary coverage, except `delete` in the same workflow that created the resource.
diff --git a/tests/cli_e2e/demo/coverage.md b/tests/cli_e2e/demo/coverage.md
new file mode 100644
index 000000000..d8cc126b3
--- /dev/null
+++ b/tests/cli_e2e/demo/coverage.md
@@ -0,0 +1,42 @@
+# Demo Coverage Template
+
+> This file is a demo template only.
+> It shows the expected `coverage.md` shape for real domains under `tests/cli_e2e/{domain}`.
+> The numbers, command list, and coverage status below are illustrative, not authoritative.
+> `tests/cli_e2e/demo/` is reference material and is not part of formal CLI E2E coverage accounting.
+> `lark-cli demo --help` does not exist, so this file cannot be recomputed from live domain help output.
+
+## Metrics
+
+- Denominator: 8 leaf commands
+- Covered: 3
+- Coverage: 37.5%
+
+## Summary
+
+- Purpose: show humans and AI agents how to maintain a per-domain coverage file even when the directory is documentation-only and not backed by a real `lark-cli demo` command tree.
+- TestDemo_TaskLifecycle: demonstrates one minimal task lifecycle workflow for documentation purposes.
+- TestDemo_TaskLifecycle/create: runs `task +create` with `summary` and `description`, captures the returned `taskGUID`, and registers parent cleanup for later teardown.
+- TestDemo_TaskLifecycle/update: runs `task +update --task-id <guid>` and mutates both `summary` and `description` on the created task.
+- TestDemo_TaskLifecycle/get: runs `task tasks get` for the same task and asserts the persisted `guid`, updated `summary`, and updated `description`.
+- Cleanup note: `task tasks delete` is executed in `parentT.Cleanup`, but this template intentionally keeps cleanup-only execution marked uncovered so workflow assertions remain distinct from teardown mechanics.
+- Demo-only gap note: `task +complete`, `task +reopen`, `task +assign`, and `task +get-my-tasks` are intentionally left as uncovered examples for a minimal template.
+
+## Command Table
+
+| Status | Cmd | Type | Testcase | Key parameter shapes | Notes / uncovered reason |
+| --- | --- | --- | --- | --- | --- |
+| ✓ | task +create | shortcut | task_lifecycle_test.go::TestDemo_TaskLifecycle/create | basic create; summary; description | demo example |
+| ✓ | task +update | shortcut | task_lifecycle_test.go::TestDemo_TaskLifecycle/update | --task-id; update summary; update description | demo example |
+| ✓ | task tasks get | api | task_lifecycle_test.go::TestDemo_TaskLifecycle/get | task_guid in --params | demo example |
+| ✕ | task tasks delete | api |  | none | cleanup exists in parentT.Cleanup, but demo coverage intentionally treats cleanup-only execution as uncovered |
+| ✕ | task +complete | shortcut |  | none | not shown in this minimal lifecycle example |
+| ✕ | task +reopen | shortcut |  | none | not shown in this minimal lifecycle example |
+| ✕ | task +assign | shortcut |  | none | example of a user-identity-sensitive command; requires real user fixtures |
+| ✕ | task +get-my-tasks | shortcut |  | none | example of a current-user-dependent command; often unavailable in bot-only environments |
+
+## Notes
+
+- In a real domain, recompute the denominator from live `lark-cli --help` exploration instead of copying this file.
+- Replace demo rows with real command inventory for that domain.
+- Keep skipped commands unchecked; reuse the `t.Skip(...)` reason as the uncovered reason.
diff --git a/tests/cli_e2e/task/coverage.md b/tests/cli_e2e/task/coverage.md
new file mode 100644
index 000000000..2016e36be
--- /dev/null
+++ b/tests/cli_e2e/task/coverage.md
@@ -0,0 +1,50 @@
+# Task CLI E2E Coverage
+
+## Metrics
+- Denominator: 29 leaf commands
+- Covered: 10
+- Coverage: 34.5%
+
+## Summary
+- TestTask_StatusWorkflow: creates a task via `task +create`, then proves `task +complete`, `task tasks get`, and `task +reopen` through `complete`, `get completed task`, `reopen`, and `get reopened task`; asserts `status` flips between `done` and `todo` and `completed_at` is set then cleared.
+- TestTask_ReminderWorkflow: creates a task with a due time via `task +create`, then proves `task +reminder` and `task tasks get` through `set reminder`, `get task with reminder`, `remove reminder`, and `get task without reminder`; asserts `relative_fire_minute=30`, reminder id presence, and reminder removal.
+- TestTask_CommentWorkflow: creates a task via `task +create`, runs `comment`, and asserts the returned comment id is non-empty; this is the direct proof for `task +comment`.
+- TestTask_TasklistWorkflow: runs `create tasklist with task`, then `get tasklist`, `list tasklist tasks`, and `get task`; proves `task +tasklist-create`, `task tasklists get`, `task tasklists tasks`, and `task tasks get` with seeded task payload and task-to-tasklist linkage.
+- TestTask_TasklistAddTaskWorkflow: creates a standalone tasklist and task, runs `add task to tasklist`, then `list tasklist tasks` and `get task with tasklist link`; proves `task +tasklist-task-add`, `task tasklists tasks`, and `task tasks get`, including no failed tasks in the add response.
+- Cleanup path note: workflow-created tasks and tasklists are deleted through direct `task tasks delete` / `task tasklists delete` cleanup paths in `helpers_test.go::createTask`, `helpers_test.go::createTasklist`, and `tasklist_workflow_test.go::TestTask_TasklistWorkflow`, but those cleanup-only executions are not counted as command coverage because no testcase asserts delete behavior as the primary proof surface.
+- Blocked area: assignee, follower, and tasklist member mutations still require stable real-user `open_id` fixtures; the current suite is bot-safe only.
+- Blocked area: `task +get-my-tasks` still depends on `--as user` identity plus deterministic user-scoped data.
+- Gap pattern: direct `tasks create/delete/list/patch`, `tasklists create/delete/list/patch`, `members *`, and `subtasks *` APIs still lack deterministic direct-call workflows, so shortcut coverage does not count for those leaf commands.
+
+## Command Table
+| Status | Cmd | Type | Testcase | Key parameter shapes | Notes / uncovered reason |
+| --- | --- | --- | --- | --- | --- |
+| ✕ | task +assign | shortcut |  | none | requires real assignee open_id fixtures; shortcut defaults to `--as user` |
+| ✓ | task +comment | shortcut | task_comment_workflow_test.go::TestTask_CommentWorkflow/comment | `--task-id`; `--content` | |
+| ✓ | task +complete | shortcut | task_status_workflow_test.go::TestTask_StatusWorkflow/complete | `--task-id` | |
+| ✓ | task +create | shortcut | task_status_workflow_test.go::TestTask_StatusWorkflow; task_comment_workflow_test.go::TestTask_CommentWorkflow; task_reminder_workflow_test.go::TestTask_ReminderWorkflow; tasklist_add_task_workflow_test.go::TestTask_TasklistAddTaskWorkflow | `summary` + `description`; `due.timestamp` + `due.is_all_day` | |
+| ✕ | task +followers | shortcut |  | none | requires real follower open_id fixtures; shortcut defaults to `--as user` |
+| ✕ | task +get-my-tasks | shortcut |  | none | depends on `--as user` identity and deterministic user-scoped task data |
+| ✓ | task +reminder | shortcut | task_reminder_workflow_test.go::TestTask_ReminderWorkflow/set reminder; task_reminder_workflow_test.go::TestTask_ReminderWorkflow/remove reminder | `--task-id --set 30m`; `--task-id --remove` | |
+| ✓ | task +reopen | shortcut | task_status_workflow_test.go::TestTask_StatusWorkflow/reopen | `--task-id` | |
+| ✓ | task +tasklist-create | shortcut | tasklist_workflow_test.go::TestTask_TasklistWorkflow/create tasklist with task; tasklist_add_task_workflow_test.go::TestTask_TasklistAddTaskWorkflow | `--name` only; `--name` plus task array in `--data` | |
+| ✕ | task +tasklist-members | shortcut |  | none | requires real member open_id fixtures to add, remove, or set tasklist members |
+| ✓ | task +tasklist-task-add | shortcut | tasklist_add_task_workflow_test.go::TestTask_TasklistAddTaskWorkflow/add task to tasklist | `--tasklist-id`; `--task-id` | |
+| ✕ | task +update | shortcut |  | none | no dedicated workflow yet for summary, description, or due-field mutation assertions |
+| ✕ | task members add | api |  | none | requires stable member fixtures and explicit direct API-body assertions |
+| ✕ | task members remove | api |  | none | requires stable member fixtures and explicit direct API-body assertions |
+| ✕ | task subtasks create | api |  | none | needs a parent-task workflow plus direct subtask payload assertions |
+| ✕ | task subtasks list | api |  | none | needs deterministic subtask fixtures created in the same workflow |
+| ✕ | task tasklists add_members | api |  | none | requires real member open_id fixtures and direct API coverage |
+| ✕ | task tasklists create | api |  | none | only covered indirectly through `task +tasklist-create`; no direct API invocation yet |
+| ✕ | task tasklists delete | api |  | none | only exercised in parent cleanup; no testcase asserts delete behavior or post-delete state as the primary proof |
+| ✓ | task tasklists get | api | tasklist_workflow_test.go::TestTask_TasklistWorkflow/get tasklist | `tasklist_guid` in `--params` | |
+| ✕ | task tasklists list | api |  | none | needs isolated list or filter assertions against ambient tasklist data |
+| ✕ | task tasklists patch | api |  | none | no dedicated direct tasklist-update workflow yet |
+| ✕ | task tasklists remove_members | api |  | none | requires real member open_id fixtures and direct API coverage |
+| ✓ | task tasklists tasks | api | tasklist_workflow_test.go::TestTask_TasklistWorkflow/list tasklist tasks; tasklist_add_task_workflow_test.go::TestTask_TasklistAddTaskWorkflow/list tasklist tasks | `tasklist_guid`; `page_size` | |
+| ✕ | task tasks create | api |  | none | only covered indirectly through `task +create`; no direct API invocation yet |
+| ✕ | task tasks delete | api |  | none | only exercised in parent cleanup; no testcase asserts delete behavior or post-delete state as the primary proof |
+| ✓ | task tasks get | api | task_status_workflow_test.go::TestTask_StatusWorkflow/get completed task; task_status_workflow_test.go::TestTask_StatusWorkflow/get reopened task; task_reminder_workflow_test.go::TestTask_ReminderWorkflow/get task with reminder; task_reminder_workflow_test.go::TestTask_ReminderWorkflow/get task without reminder; tasklist_workflow_test.go::TestTask_TasklistWorkflow/get task; tasklist_add_task_workflow_test.go::TestTask_TasklistAddTaskWorkflow/get task with tasklist link | `task_guid` in `--params`; assert status, reminders, summary, description, and tasklist link | |
+| ✕ | task tasks list | api |  | none | needs isolated list or filter assertions against ambient task data |
+| ✕ | task tasks patch | api |  | none | no dedicated direct task-update workflow yet |

From 1c3535f78321c161ec2ae28e0da5515db772d77c Mon Sep 17 00:00:00 2001
From: yaozhen00 <yaozhen.00@bytedance.com>
Date: Mon, 13 Apr 2026 16:45:41 +0800
Subject: [PATCH 2/2] feat(test): test report show

---
 .github/workflows/cli-e2e.yml | 80 ++++++-----------------------------
 1 file changed, 14 insertions(+), 66 deletions(-)

diff --git a/.github/workflows/cli-e2e.yml b/.github/workflows/cli-e2e.yml
index f8dcfd826..c08620863 100644
--- a/.github/workflows/cli-e2e.yml
+++ b/.github/workflows/cli-e2e.yml
@@ -25,6 +25,8 @@ on:
 
 permissions:
   contents: read
+  actions: read
+  checks: write
 
 jobs:
   cli-e2e:
@@ -65,71 +67,17 @@ jobs:
             echo "No CLI E2E packages to test after exclusions."
             exit 1
           fi
-          go run gotest.tools/gotestsum@v1.12.3 --format testname --junitfile cli-e2e-report.xml -- -count=1 -v $packages
+          # gotestsum requires --packages when --rerun-fails is combined with go test args after --.
+          packages_arg=$(printf '%s\n' "$packages" | paste -sd' ' -)
+          go run gotest.tools/gotestsum@v1.12.3 --rerun-fails=2 --rerun-fails-max-failures=20 --packages="$packages_arg" --format testname --junitfile cli-e2e-report.xml -- -count=1 -v
 
-      - name: Summarize CLI E2E test report
+      - name: Publish CLI E2E test report
         if: ${{ !cancelled() }}
-        run: |
-          python3 - <<'PY'
-          import os
-          import xml.etree.ElementTree as ET
-
-          report_path = "cli-e2e-report.xml"
-          summary_path = os.environ["GITHUB_STEP_SUMMARY"]
-
-          root = ET.parse(report_path).getroot()
-          suites = [root] if root.tag == "testsuite" else root.findall("testsuite")
-
-          tests = failures = errors = skipped = 0
-          failed_cases = []
-          skipped_cases = []
-
-          for suite in suites:
-            tests += int(suite.attrib.get("tests", 0))
-            failures += int(suite.attrib.get("failures", 0))
-            errors += int(suite.attrib.get("errors", 0))
-            skipped += int(suite.attrib.get("skipped", 0))
-
-            for case in suite.findall("testcase"):
-              classname = case.attrib.get("classname", "")
-              name = case.attrib.get("name", "")
-              label = f"{classname}.{name}" if classname else name
-
-              failure = case.find("failure")
-              error = case.find("error")
-              skipped_node = case.find("skipped")
-
-              if failure is not None or error is not None:
-                message = ""
-                node = failure if failure is not None else error
-                if node is not None:
-                  message = node.attrib.get("message", "") or (node.text or "").strip()
-                failed_cases.append((label, message))
-              elif skipped_node is not None:
-                message = skipped_node.attrib.get("message", "") or (skipped_node.text or "").strip()
-                skipped_cases.append((label, message))
-
-          passed = tests - failures - errors - skipped
-
-          with open(summary_path, "a", encoding="utf-8") as f:
-            f.write("## CLI E2E Test Report\n\n")
-            f.write(f"- Total: {tests}\n")
-            f.write(f"- Passed: {passed}\n")
-            f.write(f"- Failed: {failures}\n")
-            f.write(f"- Errors: {errors}\n")
-            f.write(f"- Skipped: {skipped}\n\n")
-
-            if failed_cases:
-              f.write("### Failed Tests\n\n")
-              for label, message in failed_cases:
-                detail = f" - {message}" if message else ""
-                f.write(f"- `{label}`{detail}\n")
-              f.write("\n")
-
-            if skipped_cases:
-              f.write("### Skipped Tests\n\n")
-              for label, message in skipped_cases:
-                detail = f" - {message}" if message else ""
-                f.write(f"- `{label}`{detail}\n")
-              f.write("\n")
-          PY
+        uses: dorny/test-reporter@a43b3a5f7366b97d083190328d2c652e1a8b6aa2 # v3.0.0
+        with:
+          name: CLI E2E Tests
+          path: cli-e2e-report.xml
+          reporter: java-junit
+          use-actions-summary: true
+          list-suites: all
+          list-tests: all