From 76c9839bc94b2da37cdbac6e763a9aabb84a7467 Mon Sep 17 00:00:00 2001
From: zbigniew sobiecki <zbigniew@sobiecki.name>
Date: Sun, 18 Jan 2026 10:59:08 +0100
Subject: [PATCH] feat(gadgets): split TodoUpsert into focused gadgets

Extract status updates from TodoUpsert into a dedicated TodoUpdateStatus
gadget to reduce LLM confusion and improve task tracking clarity.

Changes:
- Add TodoUpdateStatus gadget for status-only updates (pending/in_progress/done)
- Simplify TodoUpsert to handle content creation/updates only
- Update prompts to guide agents on using the separate gadgets
- Add git status and PR status to implementation trailing messages
- Add EditFile/WriteFile result handling guidance to prompts
- Add comprehensive tests for both gadgets

The separation makes each gadget's purpose clearer:
- TodoUpsert: Plan work by creating todos
- TodoUpdateStatus: Track progress by updating status

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/agents/base.ts                            |   3 +-
 .../prompts/templates/implementation.eta      |  15 +-
 .../templates/partials/rules-efficiency.eta   |  42 +++
 .../prompts/templates/respond-to-review.eta   |  15 +-
 src/agents/review.ts                          |   3 +-
 src/agents/utils/agentLoop.ts                 |   6 +-
 src/config/hintConfig.ts                      |  52 +++-
 src/gadgets/todo/TodoUpdateStatus.ts          |  46 +++
 src/gadgets/todo/TodoUpsert.ts                |  86 ++----
 src/gadgets/todo/index.ts                     |   1 +
 tests/unit/gadgets/todo.test.ts               | 284 ++++++++++++++++++
 11 files changed, 473 insertions(+), 80 deletions(-)
 create mode 100644 src/gadgets/todo/TodoUpdateStatus.ts
 create mode 100644 tests/unit/gadgets/todo.test.ts

diff --git a/src/agents/base.ts b/src/agents/base.ts
index dc8bad31..0d344c5a 100644
--- a/src/agents/base.ts
+++ b/src/agents/base.ts
@@ -15,7 +15,7 @@ import { ReadFile } from '../gadgets/ReadFile.js';
 import { Sleep } from '../gadgets/Sleep.js';
 import { CreatePR } from '../gadgets/github/index.js';
 import { Tmux } from '../gadgets/tmux.js';
-import { TodoDelete, TodoUpsert } from '../gadgets/todo/index.js';
+import { TodoDelete, TodoUpdateStatus, TodoUpsert } from '../gadgets/todo/index.js';
 import {
 	AddChecklistToCard,
 	CreateTrelloCard,
@@ -290,6 +290,7 @@ function createAgentBuilderWithGadgets(
 		new Sleep(),
 		// Task tracking gadgets
 		new TodoUpsert(),
+		new TodoUpdateStatus(),
 		new TodoDelete(),
 		// GitHub gadgets (no PR creation for planning)
 		...(isReadOnlyAgent ? [] : [new CreatePR()]),
diff --git a/src/agents/prompts/templates/implementation.eta b/src/agents/prompts/templates/implementation.eta
index 0454a767..31683354 100644
--- a/src/agents/prompts/templates/implementation.eta
+++ b/src/agents/prompts/templates/implementation.eta
@@ -21,15 +21,12 @@ You are an expert software engineer implementing features and fixing issues base
 
 ## Task Tracking
 
-Use TodoUpsert to create a todo list after reading the Trello card. This helps you:
-- Plan your work upfront
-- Track progress through implementation
-- Stay focused and avoid getting lost in exploration
-
-**Workflow:**
-1. After reading the card, create todos for each major step (e.g., "Create feature branch", "Implement X", "Write tests", "Run lint/type checks", "Create PR")
-2. Mark each todo as `in_progress` when you start it
-3. Mark each todo as `done` when you complete it
+Use the todo gadgets to plan and track your progress:
+
+1. **Plan**: After reading the Trello card, use `TodoUpsert` to create todos for major steps
+2. **Start**: Use `TodoUpdateStatus` to mark a todo as `in_progress` when you begin working on it
+3. **Complete**: Use `TodoUpdateStatus` to mark it as `done` when finished
+4. **One at a time**: Only have ONE todo in_progress at any time
 
 ## Understanding the Codebase
 
diff --git a/src/agents/prompts/templates/partials/rules-efficiency.eta b/src/agents/prompts/templates/partials/rules-efficiency.eta
index 1c2ab5e7..290c834e 100644
--- a/src/agents/prompts/templates/partials/rules-efficiency.eta
+++ b/src/agents/prompts/templates/partials/rules-efficiency.eta
@@ -54,3 +54,45 @@ in a single batch. Avoid the 'fix-run-repeat' loop for known breaking changes.
 When validating changes, always prioritize auto-fixing commands (e.g., format, lint --fix) before
 running read-only checks. If a generated artifact (like a migration or lockfile) looks incorrect,
 investigate the environment state rather than manually patching the output.
+
+### Acting on EditFile/WriteFile Results (CRITICAL)
+
+EditFile and WriteFile return structured output. You MUST read and act on it.
+
+**Status Codes:**
+- `status=success` - Edit worked, but CHECK THE DIAGNOSTICS SECTION
+- `status=failed` - Search content not found, USE THE SUGGESTIONS
+- `status=error` - Operation failed (permissions, path), read error message
+
+**On status=failed (search not found):**
+
+The output includes a SUGGESTIONS section with similar content found in the file:
+```
+SUGGESTIONS (similar content found):
+Line 42 (85% similar):
+```{what actually exists}```
+```
+
+1. READ this section - it shows what the file actually contains
+2. Adjust your search pattern to match the actual content
+3. Common issues: whitespace differences, indentation, content changed by previous edit
+4. NEVER retry the exact same search - that's a loop
+
+**On status=success (check diagnostics):**
+
+Success responses include TypeScript and Biome diagnostics:
+```
+=== TypeScript Check ===
+{any type errors from your edit}
+
+=== Biome Lint ===
+{any lint issues from your edit}
+```
+
+If diagnostics show issues, FIX THEM IMMEDIATELY before making more edits.
+Don't proceed to other files until the current file is clean.
+
+**Recovery Escalation:**
+1. First failure: Use SUGGESTIONS to adjust search pattern
+2. Second failure: Read entire file with ReadFile, understand actual structure
+3. Third failure: Use WriteFile to replace the entire file content
\ No newline at end of file
diff --git a/src/agents/prompts/templates/respond-to-review.eta b/src/agents/prompts/templates/respond-to-review.eta
index a881e7b7..5af6df61 100644
--- a/src/agents/prompts/templates/respond-to-review.eta
+++ b/src/agents/prompts/templates/respond-to-review.eta
@@ -13,15 +13,12 @@ You are an expert software engineer addressing code review feedback on a pull re
 
 ## Task Tracking
 
-Use TodoUpsert to create a todo list after reading the review comments. This helps you:
-- Track each review comment that needs addressing
-- Avoid missing any feedback
-- Stay focused and organized
-
-**Workflow:**
-1. After reading comments, create a todo for each comment/issue to address
-2. Mark each todo as `in_progress` when you start it
-3. Mark each todo as `done` when you complete it AND reply to the comment
+Use the todo gadgets to track review comments:
+
+1. **Plan**: After reading comments, use `TodoUpsert` to create a todo for each comment to address
+2. **Start**: Use `TodoUpdateStatus` to mark a todo as `in_progress` when you begin working on it
+3. **Complete**: Use `TodoUpdateStatus` to mark it as `done` when finished AND reply to the comment
+4. **One at a time**: Only have ONE todo in_progress at any time
 
 ## Understanding the Codebase
 
diff --git a/src/agents/review.ts b/src/agents/review.ts
index 6f7ea810..74def9ad 100644
--- a/src/agents/review.ts
+++ b/src/agents/review.ts
@@ -20,7 +20,7 @@ import {
 	formatCheckStatus,
 } from '../gadgets/github/index.js';
 import { Tmux } from '../gadgets/tmux.js';
-import { TodoDelete, TodoUpsert } from '../gadgets/todo/index.js';
+import { TodoDelete, TodoUpdateStatus, TodoUpsert } from '../gadgets/todo/index.js';
 import { githubClient } from '../github/client.js';
 import type { AgentInput, AgentResult, CascadeConfig, ProjectConfig } from '../types/index.js';
 import { cleanupLogDirectory, cleanupLogFile, createFileLogger } from '../utils/fileLogger.js';
@@ -253,6 +253,7 @@ function createReviewAgentBuilder(
 		new Sleep(),
 		// Task tracking gadgets
 		new TodoUpsert(),
+		new TodoUpdateStatus(),
 		new TodoDelete(),
 		// GitHub gadgets (read + create review)
 		new GetPRDetails(),
diff --git a/src/agents/utils/agentLoop.ts b/src/agents/utils/agentLoop.ts
index 9b897a8c..a3b6916d 100644
--- a/src/agents/utils/agentLoop.ts
+++ b/src/agents/utils/agentLoop.ts
@@ -90,9 +90,13 @@ function addGadgetSpecificLogContext(
 
 	if (gadgetName === 'TodoUpsert') {
 		if (parameters.id) logContext.id = parameters.id;
-		if (parameters.status) logContext.status = parameters.status;
 		if (parameters.content) logContext.todo = truncateContent(String(parameters.content), 80);
 	}
+
+	if (gadgetName === 'TodoUpdateStatus') {
+		logContext.id = parameters.id;
+		logContext.status = parameters.status;
+	}
 }
 
 // ============================================================================
diff --git a/src/config/hintConfig.ts b/src/config/hintConfig.ts
index e26aa7a6..4204059c 100644
--- a/src/config/hintConfig.ts
+++ b/src/config/hintConfig.ts
@@ -1,3 +1,4 @@
+import { execSync } from 'node:child_process';
 import type { TrailingMessage } from 'llmist';
 import { formatTodoList, loadTodos } from '../gadgets/todo/storage.js';
 
@@ -33,6 +34,31 @@ function getAgentHint(agentType?: string): string {
 	return AGENT_HINTS.default;
 }
 
+/**
+ * Run a shell command and return output, or null on error.
+ */
+function runCommand(command: string): string | null {
+	try {
+		return execSync(command, { encoding: 'utf-8', timeout: 5000 }).trim();
+	} catch {
+		return null;
+	}
+}
+
+/**
+ * Get git status output (short format for brevity).
+ */
+function getGitStatus(): string | null {
+	return runCommand('git status --short');
+}
+
+/**
+ * Get PR view output if a PR exists for current branch.
+ */
+function getPRView(): string | null {
+	return runCommand('gh pr view 2>/dev/null');
+}
+
 /**
  * Format the iteration status line with appropriate urgency indicator.
  */
@@ -76,13 +102,33 @@ export function getIterationTrailingMessage(agentType?: string): TrailingMessage
 	return (ctx) => {
 		const iterationStatus = formatIterationStatus(ctx.iteration, ctx.maxIterations, batchHint);
 
-		// For implementation agent, include the current todo list
+		// For implementation agent, include progress info, git status, and PR status
 		if (agentType === 'implementation') {
+			const sections: string[] = [iterationStatus];
+
+			// Add todo list if there are todos
 			const todos = loadTodos();
 			if (todos.length > 0) {
-				const todoListFormatted = formatTodoList(todos);
-				return `${iterationStatus}\n\n## Current Progress\n\n${todoListFormatted}`;
+				sections.push(`## Current Progress\n\n${formatTodoList(todos)}`);
 			}
+
+			// Add git status
+			const gitStatus = getGitStatus();
+			if (gitStatus) {
+				sections.push(`## Git Status\n\n\`\`\`\n${gitStatus}\n\`\`\``);
+			} else {
+				sections.push('## Git Status\n\nNo uncommitted changes.');
+			}
+
+			// Add PR status if a PR exists
+			const prView = getPRView();
+			if (prView) {
+				sections.push(`## PR Status\n\n\`\`\`\n${prView}\n\`\`\``);
+			} else {
+				sections.push('## PR Status\n\nNo PR exists for current branch.');
+			}
+
+			return sections.join('\n\n');
 		}
 
 		return iterationStatus;
diff --git a/src/gadgets/todo/TodoUpdateStatus.ts b/src/gadgets/todo/TodoUpdateStatus.ts
new file mode 100644
index 00000000..db407b8e
--- /dev/null
+++ b/src/gadgets/todo/TodoUpdateStatus.ts
@@ -0,0 +1,46 @@
+/**
+ * TodoUpdateStatus gadget - Update the status of existing todo items.
+ * Helps agents track progress through implementation tasks.
+ */
+import { Gadget, z } from 'llmist';
+import { type TodoStatus, formatTodoList, loadTodos, saveTodos } from './storage.js';
+
+export class TodoUpdateStatus extends Gadget({
+	name: 'TodoUpdateStatus',
+	description: `Update the status of an existing todo item.
+Use this to track progress: mark todos as in_progress when starting, done when complete.`,
+	schema: z.object({
+		id: z.string().describe('ID of the todo to update (required)'),
+		status: z.enum(['pending', 'in_progress', 'done']).describe('New status'),
+		comment: z.string().optional().describe('Brief explanation of why this change is needed'),
+	}),
+	examples: [
+		{
+			params: { id: '1', status: 'in_progress', comment: 'Starting work on first task' },
+			output:
+				'✏️ Updated todo #1 → in_progress.\n\n📋 Todo List\n   Progress: 0/1 done, 1 in progress, 0 pending\n\n🔄 #1 [in_progress]: Read and understand requirements',
+			comment: 'Mark todo as in progress',
+		},
+		{
+			params: { id: '1', status: 'done', comment: 'Task completed successfully' },
+			output:
+				'✏️ Updated todo #1 → done.\n\n📋 Todo List\n   Progress: 1/1 done, 0 in progress, 0 pending\n\n✅ #1 [done]: Read and understand requirements',
+			comment: 'Mark todo as done',
+		},
+	],
+}) {
+	override execute(params: this['params']): string {
+		const { id, status } = params;
+		const todos = loadTodos();
+		const index = todos.findIndex((t) => t.id === id);
+
+		if (index === -1) {
+			throw new Error(`Todo #${id} not found. Use TodoUpsert to create todos first.`);
+		}
+
+		todos[index].status = status as TodoStatus;
+		todos[index].updatedAt = new Date().toISOString();
+		saveTodos(todos);
+		return `✏️ Updated todo #${id} → ${status}.\n\n${formatTodoList(todos)}`;
+	}
+}
diff --git a/src/gadgets/todo/TodoUpsert.ts b/src/gadgets/todo/TodoUpsert.ts
index 45a4b04b..d9b3fe19 100644
--- a/src/gadgets/todo/TodoUpsert.ts
+++ b/src/gadgets/todo/TodoUpsert.ts
@@ -1,21 +1,13 @@
 /**
- * TodoUpsert gadget - Create or update todo items.
- * Helps agents track their progress through implementation tasks.
+ * TodoUpsert gadget - Create or update todo item content.
+ * Helps agents plan and organize their implementation tasks.
  */
 import { Gadget, z } from 'llmist';
-import {
-	type Todo,
-	type TodoStatus,
-	formatTodoList,
-	getNextId,
-	loadTodos,
-	saveTodos,
-} from './storage.js';
+import { type Todo, formatTodoList, getNextId, loadTodos, saveTodos } from './storage.js';
 
 interface TodoItem {
 	id?: string;
 	content?: string;
-	status?: string;
 }
 
 interface BatchResult {
@@ -31,14 +23,13 @@ function upsertBatchItem(item: TodoItem, todos: Todo[], now: string): 'created'
 			todos.push({
 				id: item.id,
 				content: item.content,
-				status: (item.status as TodoStatus) ?? 'pending',
+				status: 'pending',
 				createdAt: now,
 				updatedAt: now,
 			});
 			return 'created';
 		}
 		if (item.content !== undefined) todos[index].content = item.content;
-		if (item.status !== undefined) todos[index].status = item.status as TodoStatus;
 		todos[index].updatedAt = now;
 		return 'updated';
 	}
@@ -47,7 +38,7 @@ function upsertBatchItem(item: TodoItem, todos: Todo[], now: string): 'created'
 	todos.push({
 		id: getNextId(todos),
 		content: item.content,
-		status: (item.status as TodoStatus) ?? 'pending',
+		status: 'pending',
 		createdAt: now,
 		updatedAt: now,
 	});
@@ -76,18 +67,6 @@ function formatBatchResult(result: BatchResult, todos: Todo[]): string {
 	return `${parts.join(', ')}.\n\n${formatTodoList(todos)}`;
 }
 
-function updateExistingTodo(
-	index: number,
-	todos: Todo[],
-	content: string | undefined,
-	status: string | undefined,
-	now: string,
-): void {
-	if (content !== undefined) todos[index].content = content;
-	if (status !== undefined) todos[index].status = status as TodoStatus;
-	todos[index].updatedAt = now;
-}
-
 const todoItemSchema = z.object({
 	id: z.string().optional().describe('ID of existing todo to update. Omit to create a new todo.'),
 	content: z
@@ -95,20 +74,17 @@ const todoItemSchema = z.object({
 		.min(1)
 		.optional()
 		.describe('The todo item description. Required when creating, optional when updating.'),
-	status: z
-		.enum(['pending', 'in_progress', 'done'])
-		.optional()
-		.describe("Todo status: pending, in_progress, or done. Defaults to 'pending' for new items."),
 });
 
 export class TodoUpsert extends Gadget({
 	name: 'TodoUpsert',
-	description: `Create or update one or more todo items.
+	description: `Create or update todo item content.
 
-Use this to plan your work at the start of a task and track progress as you go.
-- For a single item: use id/content/status directly
+Use this to plan your work at the start of a task.
+- For a single item: use id/content directly
 - For multiple items: use the 'items' array to batch create/update
 
+All new todos start with status 'pending'. Use TodoUpdateStatus to change status.
 Returns the full todo list after the operation.`,
 	schema: z.object({
 		id: z.string().optional().describe('ID of existing todo to update. Omit to create a new todo.'),
@@ -117,38 +93,24 @@ Returns the full todo list after the operation.`,
 			.min(1)
 			.optional()
 			.describe('The todo item description. Required when creating, optional when updating.'),
-		status: z
-			.enum(['pending', 'in_progress', 'done'])
-			.optional()
-			.describe("Todo status: pending, in_progress, or done. Defaults to 'pending' for new items."),
 		items: z
 			.array(todoItemSchema)
 			.optional()
 			.describe(
-				'Batch mode: array of todo items to create/update. Each item has id/content/status. Use this to create multiple todos at once.',
+				'Batch mode: array of todo items to create/update. Each item has id/content. Use this to create multiple todos at once.',
 			),
+		comment: z.string().optional().describe('Brief explanation of why this change is needed'),
 	}),
 	examples: [
 		{
 			params: {
 				content: 'Read and understand the Trello card requirements',
+				comment: 'Planning initial task',
 			},
 			output:
 				'➕ Created todo #1.\n\n📋 Todo List\n   Progress: 0/1 done, 0 in progress, 1 pending\n\n⬜ #1 [pending]: Read and understand the Trello card requirements',
 			comment: 'Create a new todo item',
 		},
-		{
-			params: { id: '1', status: 'in_progress' },
-			output:
-				'✏️ Updated todo #1.\n\n📋 Todo List\n   Progress: 0/1 done, 1 in progress, 0 pending\n\n🔄 #1 [in_progress]: Read and understand the Trello card requirements',
-			comment: 'Mark a todo as in progress',
-		},
-		{
-			params: { id: '1', status: 'done' },
-			output:
-				'✏️ Updated todo #1.\n\n📋 Todo List\n   Progress: 1/1 done, 0 in progress, 0 pending\n\n✅ #1 [done]: Read and understand the Trello card requirements',
-			comment: 'Mark a todo as done',
-		},
 		{
 			params: {
 				items: [
@@ -157,6 +119,7 @@ Returns the full todo list after the operation.`,
 					{ content: 'Write tests' },
 					{ content: 'Run lint and typecheck' },
 				],
+				comment: 'Planning implementation steps',
 			},
 			output:
 				'➕ Created 4 todos.\n\n📋 Todo List\n   Progress: 0/4 done, 0 in progress, 4 pending\n\n⬜ #1 [pending]: Create feature branch\n⬜ #2 [pending]: Implement feature\n⬜ #3 [pending]: Write tests\n⬜ #4 [pending]: Run lint and typecheck',
@@ -165,7 +128,17 @@ Returns the full todo list after the operation.`,
 	],
 }) {
 	override execute(params: this['params']): string {
-		const { id, content, status, items } = params;
+		const { id, content, items } = params;
+
+		// Prevent mixing single-item update with batch create (silently fails otherwise)
+		if (items && items.length > 0 && (id !== undefined || content !== undefined)) {
+			throw new Error(
+				"Cannot combine top-level id/content with 'items' array. " +
+					'Use either single-item mode (id/content) OR batch mode (items array), not both. ' +
+					'To update one todo and create others, make two separate calls.',
+			);
+		}
+
 		const todos = loadTodos();
 		const now = new Date().toISOString();
 
@@ -181,12 +154,12 @@ Returns the full todo list after the operation.`,
 			const index = todos.findIndex((t) => t.id === id);
 			if (index === -1) {
 				if (!content) {
-					return `❌ Error: 'content' is required when creating a new todo.`;
+					throw new Error("'content' is required when creating a new todo.");
 				}
 				todos.push({
 					id,
 					content,
-					status: (status as TodoStatus) ?? 'pending',
+					status: 'pending',
 					createdAt: now,
 					updatedAt: now,
 				});
@@ -194,21 +167,22 @@ Returns the full todo list after the operation.`,
 				return `➕ Created todo #${id}.\n\n${formatTodoList(todos)}`;
 			}
 
-			updateExistingTodo(index, todos, content, status, now);
+			if (content !== undefined) todos[index].content = content;
+			todos[index].updatedAt = now;
 			saveTodos(todos);
 			return `✏️ Updated todo #${id}.\n\n${formatTodoList(todos)}`;
 		}
 
 		// Create new todo - content is required
 		if (!content) {
-			return `❌ Error: 'content' is required when creating a new todo.`;
+			throw new Error("'content' is required when creating a new todo.");
 		}
 
 		const newId = getNextId(todos);
 		todos.push({
 			id: newId,
 			content,
-			status: (status as TodoStatus) ?? 'pending',
+			status: 'pending',
 			createdAt: now,
 			updatedAt: now,
 		});
diff --git a/src/gadgets/todo/index.ts b/src/gadgets/todo/index.ts
index 0cb5c330..55cc36a3 100644
--- a/src/gadgets/todo/index.ts
+++ b/src/gadgets/todo/index.ts
@@ -2,6 +2,7 @@
  * Todo gadgets for agent task tracking.
  * Helps agents plan work and track progress through implementation tasks.
  */
+export { TodoUpdateStatus } from './TodoUpdateStatus.js';
 export { TodoUpsert } from './TodoUpsert.js';
 export { TodoDelete } from './TodoDelete.js';
 export { initTodoSession } from './storage.js';
diff --git a/tests/unit/gadgets/todo.test.ts b/tests/unit/gadgets/todo.test.ts
new file mode 100644
index 00000000..6757bfe2
--- /dev/null
+++ b/tests/unit/gadgets/todo.test.ts
@@ -0,0 +1,284 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+import { TodoUpdateStatus } from '../../../src/gadgets/todo/TodoUpdateStatus.js';
+import { TodoUpsert } from '../../../src/gadgets/todo/TodoUpsert.js';
+
+// Mock the storage module
+vi.mock('../../../src/gadgets/todo/storage.js', () => {
+	let todos: Array<{
+		id: string;
+		content: string;
+		status: string;
+		createdAt: string;
+		updatedAt: string;
+	}> = [];
+
+	return {
+		loadTodos: vi.fn(() => todos),
+		saveTodos: vi.fn((newTodos) => {
+			todos = newTodos;
+		}),
+		getNextId: vi.fn((existingTodos) => {
+			const maxId = existingTodos.reduce(
+				(max: number, t: { id: string }) => Math.max(max, Number.parseInt(t.id) || 0),
+				0,
+			);
+			return String(maxId + 1);
+		}),
+		formatTodoList: vi.fn((existingTodos) => {
+			if (existingTodos.length === 0) return '📋 Todo list is empty.';
+			const lines = existingTodos.map(
+				(t: { id: string; status: string; content: string }) =>
+					`#${t.id} [${t.status}]: ${t.content}`,
+			);
+			return `📋 Todo List\n${lines.join('\n')}`;
+		}),
+		// Helper to reset todos between tests
+		_resetTodos: () => {
+			todos = [];
+		},
+		_setTodos: (
+			newTodos: Array<{
+				id: string;
+				content: string;
+				status: string;
+				createdAt: string;
+				updatedAt: string;
+			}>,
+		) => {
+			todos = newTodos;
+		},
+	};
+});
+
+// Import the mocked functions to access helpers
+import * as storage from '../../../src/gadgets/todo/storage.js';
+
+describe('TodoUpsert', () => {
+	let gadget: TodoUpsert;
+
+	beforeEach(() => {
+		gadget = new TodoUpsert();
+		// Reset todos before each test
+		(storage as unknown as { _resetTodos: () => void })._resetTodos();
+	});
+
+	afterEach(() => {
+		vi.clearAllMocks();
+	});
+
+	describe('gadget metadata', () => {
+		it('has correct name', () => {
+			expect(gadget.name).toBe('TodoUpsert');
+		});
+
+		it('has description mentioning create or update', () => {
+			expect(gadget.description).toContain('Create or update');
+		});
+
+		it('description mentions TodoUpdateStatus for status changes', () => {
+			expect(gadget.description).toContain('TodoUpdateStatus');
+		});
+	});
+
+	describe('mixed mode validation', () => {
+		it('throws error when combining id with items array', () => {
+			expect(() =>
+				gadget.execute({
+					id: '1',
+					items: [{ content: 'New task' }],
+				}),
+			).toThrow("Cannot combine top-level id/content with 'items' array");
+		});
+
+		it('throws error when combining content with items array', () => {
+			expect(() =>
+				gadget.execute({
+					content: 'Some content',
+					items: [{ content: 'New task' }],
+				}),
+			).toThrow("Cannot combine top-level id/content with 'items' array");
+		});
+
+		it('error message suggests making two separate calls', () => {
+			try {
+				gadget.execute({
+					id: '1',
+					items: [{ content: 'New task' }],
+				});
+				expect.fail('Should have thrown');
+			} catch (error) {
+				expect((error as Error).message).toContain('make two separate calls');
+			}
+		});
+
+		it('allows empty items array with id (single-item mode)', () => {
+			// Set up an existing todo
+			(storage as unknown as { _setTodos: (todos: unknown[]) => void })._setTodos([
+				{ id: '1', content: 'Existing task', status: 'pending', createdAt: '', updatedAt: '' },
+			]);
+
+			// Empty items array should be ignored, single-item mode takes over
+			const result = gadget.execute({
+				id: '1',
+				content: 'Updated task',
+				items: [],
+			});
+
+			expect(result).toContain('Updated todo #1');
+		});
+	});
+
+	describe('single-item mode', () => {
+		it('creates a new todo with content', () => {
+			const result = gadget.execute({
+				content: 'New task',
+			});
+
+			expect(result).toContain('Created todo #1');
+			expect(storage.saveTodos).toHaveBeenCalled();
+		});
+
+		it('new todos default to pending status', () => {
+			gadget.execute({
+				content: 'New task',
+			});
+
+			const savedTodos = (storage.saveTodos as ReturnType<typeof vi.fn>).mock.calls[0][0];
+			expect(savedTodos[0].status).toBe('pending');
+		});
+
+		it('updates existing todo content', () => {
+			(storage as unknown as { _setTodos: (todos: unknown[]) => void })._setTodos([
+				{ id: '1', content: 'Existing task', status: 'pending', createdAt: '', updatedAt: '' },
+			]);
+
+			const result = gadget.execute({
+				id: '1',
+				content: 'Updated task',
+			});
+
+			expect(result).toContain('Updated todo #1');
+		});
+
+		it('throws error when creating without content', () => {
+			expect(() => gadget.execute({})).toThrow("'content' is required when creating a new todo.");
+		});
+
+		it('throws error when creating with id but no content', () => {
+			expect(() => gadget.execute({ id: '99' })).toThrow(
+				"'content' is required when creating a new todo.",
+			);
+		});
+	});
+
+	describe('batch mode', () => {
+		it('creates multiple todos at once', () => {
+			const result = gadget.execute({
+				items: [{ content: 'Task 1' }, { content: 'Task 2' }, { content: 'Task 3' }],
+			});
+
+			expect(result).toContain('Created 3 todos');
+			expect(storage.saveTodos).toHaveBeenCalled();
+		});
+
+		it('batch created todos default to pending status', () => {
+			gadget.execute({
+				items: [{ content: 'Task 1' }, { content: 'Task 2' }],
+			});
+
+			const savedTodos = (storage.saveTodos as ReturnType<typeof vi.fn>).mock.calls[0][0];
+			expect(savedTodos[0].status).toBe('pending');
+			expect(savedTodos[1].status).toBe('pending');
+		});
+	});
+});
+
+describe('TodoUpdateStatus', () => {
+	let gadget: TodoUpdateStatus;
+
+	beforeEach(() => {
+		gadget = new TodoUpdateStatus();
+		// Reset todos before each test
+		(storage as unknown as { _resetTodos: () => void })._resetTodos();
+	});
+
+	afterEach(() => {
+		vi.clearAllMocks();
+	});
+
+	describe('gadget metadata', () => {
+		it('has correct name', () => {
+			expect(gadget.name).toBe('TodoUpdateStatus');
+		});
+
+		it('has description mentioning status update', () => {
+			expect(gadget.description).toContain('status');
+		});
+	});
+
+	describe('status updates', () => {
+		it('updates status of existing todo to in_progress', () => {
+			(storage as unknown as { _setTodos: (todos: unknown[]) => void })._setTodos([
+				{ id: '1', content: 'Existing task', status: 'pending', createdAt: '', updatedAt: '' },
+			]);
+
+			const result = gadget.execute({
+				id: '1',
+				status: 'in_progress',
+			});
+
+			expect(result).toContain('Updated todo #1');
+			expect(result).toContain('in_progress');
+			expect(storage.saveTodos).toHaveBeenCalled();
+		});
+
+		it('updates status of existing todo to done', () => {
+			(storage as unknown as { _setTodos: (todos: unknown[]) => void })._setTodos([
+				{ id: '1', content: 'Existing task', status: 'in_progress', createdAt: '', updatedAt: '' },
+			]);
+
+			const result = gadget.execute({
+				id: '1',
+				status: 'done',
+			});
+
+			expect(result).toContain('Updated todo #1');
+			expect(result).toContain('done');
+		});
+
+		it('updates updatedAt timestamp', () => {
+			(storage as unknown as { _setTodos: (todos: unknown[]) => void })._setTodos([
+				{ id: '1', content: 'Existing task', status: 'pending', createdAt: '', updatedAt: '' },
+			]);
+
+			gadget.execute({
+				id: '1',
+				status: 'done',
+			});
+
+			const savedTodos = (storage.saveTodos as ReturnType<typeof vi.fn>).mock.calls[0][0];
+			expect(savedTodos[0].updatedAt).not.toBe('');
+		});
+	});
+
+	describe('error handling', () => {
+		it('throws error for non-existent todo', () => {
+			expect(() =>
+				gadget.execute({
+					id: '99',
+					status: 'done',
+				}),
+			).toThrow('Todo #99 not found. Use TodoUpsert to create todos first.');
+		});
+
+		it('throws error when todo list is empty', () => {
+			expect(() =>
+				gadget.execute({
+					id: '1',
+					status: 'in_progress',
+				}),
+			).toThrow('Todo #1 not found');
+		});
+	});
+});