From e2fed25d10eb30dd73d4c4c381425c7ff00d76fa Mon Sep 17 00:00:00 2001 From: patrick-chinchill Date: Thu, 23 Apr 2026 22:49:41 -0700 Subject: [PATCH 1/5] test(plan): port 19 [post with Plan] tests from upstream Feature in src/chat_sdk/plan.py is already ported; this closes the test-parity gap in tests/test_thread_faithful.py. Closes #55. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 1 + tests/test_thread_faithful.py | 374 ++++++++++++++++++++++++++++++++++ 2 files changed, 375 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f72e6fe..53f4ffc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,7 @@ Parity catch-up with upstream `4.26.0`. No upstream version change. - Ported the 4 `[getParticipants]` tests from `thread.test.ts` and the 4 `[thread]` factory tests from `chat.test.ts` (existing-behavior coverage for `Chat.thread(id)`). Closes 8 fidelity gaps. +- Ported 19 `[post with Plan]` tests from `thread.test.ts` — closes #55. ### Test hygiene diff --git a/tests/test_thread_faithful.py b/tests/test_thread_faithful.py index adb02f7..13f34ec 100644 --- a/tests/test_thread_faithful.py +++ b/tests/test_thread_faithful.py @@ -1479,6 +1479,380 @@ async def test_should_return_null_when_adapter_has_no_postephemeral_or_opendm(se assert result is None +# =========================================================================== +# post with Plan +# =========================================================================== + + +class TestPostWithPlan: + """describe("post with Plan") + + Ported from TS thread.test.ts to close the fidelity gap tracked in #55. + + Note: a few tests in this block expose known behavior gaps between the + current Python ``Plan`` implementation and the upstream TS version: + + * ``UpdateTaskInput`` in ``plan.py`` has no ``id`` field, so looking up + a task by id via ``update_task({"id": ...})`` is not supported. + * ``_enqueue_edit`` swallows adapter errors instead of propagating them + to the caller (upstream returns the chained promise, which rejects). + * The edit chain is rebuilt post-await rather than synchronously, which + does not preserve strict ordering under ``asyncio.gather``. + + Those tests are skipped with a pointer back here so the gaps remain + visible for a follow-up fix rather than silently drifting. + """ + + # it("should post fallback text when adapter does not support plans") + @pytest.mark.asyncio + async def test_should_post_fallback_text_when_adapter_does_not_support_plans(self): + from chat_sdk.plan import Plan, StartPlanOptions + + adapter = create_mock_adapter() + state = create_mock_state() + thread = _make_thread(adapter, state) + + plan = Plan(StartPlanOptions(initial_message="Starting...")) + await thread.post(plan) + + # Should have posted fallback text via post_message + assert len(adapter._post_calls) == 1 + assert adapter._post_calls[0][0] == "slack:C123:1234.5678" + assert "Starting..." in adapter._post_calls[0][1] + + assert plan.title == "Starting..." + assert len(plan.tasks) == 1 + assert plan.tasks[0].status == "in_progress" + assert plan.id == "msg-1" + + # it("should update via editMessage in fallback mode") + @pytest.mark.asyncio + async def test_should_update_via_editmessage_in_fallback_mode(self): + from chat_sdk.plan import AddTaskOptions, Plan, StartPlanOptions + + adapter = create_mock_adapter() + state = create_mock_state() + thread = _make_thread(adapter, state) + + plan = Plan(StartPlanOptions(initial_message="Starting...")) + await thread.post(plan) + + task = await plan.add_task(AddTaskOptions(title="Task 1")) + assert task is not None + assert task.title == "Task 1" + + # Should edit the message with updated fallback text + assert len(adapter._edit_calls) == 1 + edit_thread_id, edit_msg_id, edit_body = adapter._edit_calls[0] + assert edit_thread_id == "slack:C123:1234.5678" + assert edit_msg_id == "msg-1" + assert "Task 1" in edit_body + + # it("should complete plan via editMessage in fallback mode") + @pytest.mark.asyncio + async def test_should_complete_plan_via_editmessage_in_fallback_mode(self): + from chat_sdk.plan import ( + AddTaskOptions, + CompletePlanOptions, + Plan, + StartPlanOptions, + ) + + adapter = create_mock_adapter() + state = create_mock_state() + thread = _make_thread(adapter, state) + + plan = Plan(StartPlanOptions(initial_message="Starting...")) + await thread.post(plan) + + await plan.add_task(AddTaskOptions(title="Step 1")) + await plan.complete(CompletePlanOptions(complete_message="All done!")) + + assert plan.title == "All done!" + for task in plan.tasks: + assert task.status == "complete" + + # Last edit_message call should contain completed status icons + last_call = adapter._edit_calls[-1] + assert "✅" in last_call[2] + + # it("should call adapter postObject when supported") + @pytest.mark.asyncio + async def test_should_call_adapter_postobject_when_supported(self): + from chat_sdk.plan import Plan, StartPlanOptions + + adapter = create_mock_adapter() + state = create_mock_state() + post_object = AsyncMock(return_value=RawMessage(id="plan-msg-1", thread_id="slack:C123:1234.5678", raw={})) + edit_object = AsyncMock(return_value=None) + adapter.post_object = post_object # type: ignore[attr-defined] + adapter.edit_object = edit_object # type: ignore[attr-defined] + + thread = _make_thread(adapter, state) + plan = Plan(StartPlanOptions(initial_message="Working...")) + await thread.post(plan) + + assert post_object.await_count == 1 + call_args = post_object.await_args + assert call_args.args[0] == "slack:C123:1234.5678" + assert call_args.args[1] == "plan" + data = call_args.args[2] + assert data.title == "Working..." + assert any(t.title == "Working..." and t.status == "in_progress" for t in data.tasks) + assert plan.id == "plan-msg-1" + + # it("should add tasks and call editObject") + @pytest.mark.asyncio + async def test_should_add_tasks_and_call_editobject(self): + from chat_sdk.plan import AddTaskOptions, Plan, StartPlanOptions + + adapter = create_mock_adapter() + state = create_mock_state() + post_object = AsyncMock(return_value=RawMessage(id="plan-msg-1", thread_id="slack:C123:1234.5678", raw={})) + edit_object = AsyncMock(return_value=None) + adapter.post_object = post_object # type: ignore[attr-defined] + adapter.edit_object = edit_object # type: ignore[attr-defined] + + thread = _make_thread(adapter, state) + plan = Plan(StartPlanOptions(initial_message="Starting")) + await thread.post(plan) + task = await plan.add_task(AddTaskOptions(title="Fetch data", children=["Call API", "Parse response"])) + + assert task is not None + assert task.title == "Fetch data" + assert task.status == "in_progress" + assert edit_object.await_count >= 1 + + # Plan title should be updated to current task + assert plan.title == "Fetch data" + assert len(plan.tasks) == 2 + + # it("should update current task with output") + @pytest.mark.asyncio + async def test_should_update_current_task_with_output(self): + from chat_sdk.plan import AddTaskOptions, Plan, StartPlanOptions + + adapter = create_mock_adapter() + state = create_mock_state() + post_object = AsyncMock(return_value=RawMessage(id="plan-msg-1", thread_id="slack:C123:1234.5678", raw={})) + edit_object = AsyncMock(return_value=None) + adapter.post_object = post_object # type: ignore[attr-defined] + adapter.edit_object = edit_object # type: ignore[attr-defined] + + thread = _make_thread(adapter, state) + plan = Plan(StartPlanOptions(initial_message="Working")) + await thread.post(plan) + await plan.add_task(AddTaskOptions(title="Step 1")) + updated = await plan.update_task("Got result: 42") + + assert updated is not None + assert edit_object.await_count >= 1 + + # it("should update a specific task by ID") + @pytest.mark.asyncio + async def test_should_update_a_specific_task_by_id(self): + # GAP: Python ``UpdateTaskInput`` has no ``id`` field; the TS + # variant accepts ``{ id, output?, status? }`` to target a specific + # task. Skipping until ``plan.py`` grows id-based lookup. + pytest.skip("Python UpdateTaskInput has no 'id' field (gap vs TS); track via follow-up to #55") + + # it("should return null when updating by non-existent ID") + @pytest.mark.asyncio + async def test_should_return_null_when_updating_by_nonexistent_id(self): + # GAP: Same as above — no id-based lookup path in Python update_task. + pytest.skip("Python UpdateTaskInput has no 'id' field (gap vs TS); track via follow-up to #55") + + # it("should still update last in_progress task when no ID provided") + @pytest.mark.asyncio + async def test_should_still_update_last_in_progress_task_when_no_id_provided(self): + from chat_sdk.plan import AddTaskOptions, Plan, StartPlanOptions + + adapter = create_mock_adapter() + state = create_mock_state() + post_object = AsyncMock(return_value=RawMessage(id="plan-msg-1", thread_id="slack:C123:1234.5678", raw={})) + edit_object = AsyncMock(return_value=None) + adapter.post_object = post_object # type: ignore[attr-defined] + adapter.edit_object = edit_object # type: ignore[attr-defined] + + thread = _make_thread(adapter, state) + plan = Plan(StartPlanOptions(initial_message="Start")) + await thread.post(plan) + await plan.add_task(AddTaskOptions(title="Step 1")) + await plan.add_task(AddTaskOptions(title="Step 2")) + + updated = await plan.update_task("Some output") + + assert updated is not None + assert updated.title == "Step 2" + + # it("should complete plan and mark tasks done") + @pytest.mark.asyncio + async def test_should_complete_plan_and_mark_tasks_done(self): + from chat_sdk.plan import ( + AddTaskOptions, + CompletePlanOptions, + Plan, + StartPlanOptions, + ) + + adapter = create_mock_adapter() + state = create_mock_state() + post_object = AsyncMock(return_value=RawMessage(id="plan-msg-1", thread_id="slack:C123:1234.5678", raw={})) + edit_object = AsyncMock(return_value=None) + adapter.post_object = post_object # type: ignore[attr-defined] + adapter.edit_object = edit_object # type: ignore[attr-defined] + + thread = _make_thread(adapter, state) + plan = Plan(StartPlanOptions(initial_message="Starting")) + await thread.post(plan) + await plan.add_task(AddTaskOptions(title="Task 1")) + await plan.complete(CompletePlanOptions(complete_message="All done!")) + + assert plan.title == "All done!" + for task in plan.tasks: + assert task.status == "complete" + + # it("should reset plan and start fresh") + @pytest.mark.asyncio + async def test_should_reset_plan_and_start_fresh(self): + from chat_sdk.plan import AddTaskOptions, Plan, StartPlanOptions + + adapter = create_mock_adapter() + state = create_mock_state() + post_object = AsyncMock(return_value=RawMessage(id="plan-msg-1", thread_id="slack:C123:1234.5678", raw={})) + edit_object = AsyncMock(return_value=None) + adapter.post_object = post_object # type: ignore[attr-defined] + adapter.edit_object = edit_object # type: ignore[attr-defined] + + thread = _make_thread(adapter, state) + plan = Plan(StartPlanOptions(initial_message="First run")) + await thread.post(plan) + await plan.add_task(AddTaskOptions(title="Task A")) + await plan.add_task(AddTaskOptions(title="Task B")) + + assert len(plan.tasks) == 3 + + new_task = await plan.reset(StartPlanOptions(initial_message="Second run")) + assert new_task is not None + assert plan.title == "Second run" + assert len(plan.tasks) == 1 + assert plan.tasks[0].status == "in_progress" + + # it("should handle various PlanContent formats in initialMessage") + @pytest.mark.asyncio + async def test_should_handle_various_plancontent_formats_in_initialmessage(self): + from chat_sdk.plan import Plan, StartPlanOptions + + adapter = create_mock_adapter() + state = create_mock_state() + post_object = AsyncMock(return_value=RawMessage(id="plan-msg-1", thread_id="slack:C123:1234.5678", raw={})) + edit_object = AsyncMock(return_value=None) + adapter.post_object = post_object # type: ignore[attr-defined] + adapter.edit_object = edit_object # type: ignore[attr-defined] + + thread = _make_thread(adapter, state) + + # String + plan = Plan(StartPlanOptions(initial_message="Simple string")) + await thread.post(plan) + assert plan.title == "Simple string" + + # Array of strings + plan = Plan(StartPlanOptions(initial_message=["Line 1", "Line 2"])) + await thread.post(plan) + assert plan.title == "Line 1 Line 2" + + # Empty string defaults to "Plan" + plan = Plan(StartPlanOptions(initial_message="")) + await thread.post(plan) + assert plan.title == "Plan" + + # it("should ensure sequential edits via queue") + @pytest.mark.asyncio + async def test_should_ensure_sequential_edits_via_queue(self): + # GAP: Python ``_enqueue_edit`` awaits then re-binds ``update_chain`` + # so concurrent mutations can race and edits may arrive out of order + # (TS builds the chain synchronously with ``.then``). Skipping until + # the Python queue preserves strict ordering under ``asyncio.gather``. + pytest.skip( + "Python edit queue does not preserve strict ordering under gather (gap vs TS); track via follow-up to #55" + ) + + # it("should return null when calling addTask before post") + @pytest.mark.asyncio + async def test_should_return_null_when_calling_addtask_before_post(self): + from chat_sdk.plan import AddTaskOptions, Plan, StartPlanOptions + + plan = Plan(StartPlanOptions(initial_message="Not posted yet")) + task = await plan.add_task(AddTaskOptions(title="Task 1")) + assert task is None + + # it("should return null when calling updateTask before post") + @pytest.mark.asyncio + async def test_should_return_null_when_calling_updatetask_before_post(self): + from chat_sdk.plan import Plan, StartPlanOptions + + plan = Plan(StartPlanOptions(initial_message="Not posted yet")) + updated = await plan.update_task("some output") + assert updated is None + + # it("should return null when calling complete before post") + @pytest.mark.asyncio + async def test_should_return_null_when_calling_complete_before_post(self): + from chat_sdk.plan import CompletePlanOptions, Plan, StartPlanOptions + + plan = Plan(StartPlanOptions(initial_message="Not posted yet")) + await plan.complete(CompletePlanOptions(complete_message="Done")) + # Without a bound context, complete() is a no-op so the initial + # in_progress task stays in_progress. + assert plan.tasks[0].status == "in_progress" + + # it("should propagate editObject errors from addTask") + @pytest.mark.asyncio + async def test_should_propagate_editobject_errors_from_addtask(self): + # GAP: Python ``_enqueue_edit`` swallows exceptions (logs warn) so + # ``add_task`` does not reject when ``edit_object`` raises. Upstream + # returns the chained promise which rejects to the caller. Skipping + # until Python propagates the error. + pytest.skip("Python _enqueue_edit swallows edit_object errors (gap vs TS); track via follow-up to #55") + + # it("should continue accepting edits after a failed edit") + @pytest.mark.asyncio + async def test_should_continue_accepting_edits_after_a_failed_edit(self): + # GAP: Depends on the error-propagation fix above; after a rejected + # edit the queue must still accept new edits without the previous + # rejection poisoning the chain. Skipping until error propagation + # lands. + pytest.skip("Python _enqueue_edit swallows edit_object errors (gap vs TS); track via follow-up to #55") + + # it("should set error status via updateTask") + @pytest.mark.asyncio + async def test_should_set_error_status_via_updatetask(self): + from chat_sdk.plan import ( + AddTaskOptions, + Plan, + StartPlanOptions, + UpdateTaskInput, + ) + + adapter = create_mock_adapter() + state = create_mock_state() + post_object = AsyncMock(return_value=RawMessage(id="plan-msg-1", thread_id="slack:C123:1234.5678", raw={})) + edit_object = AsyncMock(return_value=None) + adapter.post_object = post_object # type: ignore[attr-defined] + adapter.edit_object = edit_object # type: ignore[attr-defined] + + thread = _make_thread(adapter, state) + plan = Plan(StartPlanOptions(initial_message="Start")) + await thread.post(plan) + await plan.add_task(AddTaskOptions(title="Risky step")) + await plan.update_task(UpdateTaskInput(status="error", output="Something failed")) + + current = plan.current_task + assert current is not None + assert current.status == "error" + + # =========================================================================== # subscribe and unsubscribe # =========================================================================== From 4ef0b9f78b11d429462c0240bca60091b68331dd Mon Sep 17 00:00:00 2001 From: patrick-chinchill Date: Thu, 23 Apr 2026 23:40:03 -0700 Subject: [PATCH 2/5] fix(plan): close 3 upstream-parity gaps surfaced by #55 tests - update_task now honors input.id (was only last in-progress) - add_task/update_task propagate adapter.edit_object errors - _enqueue_edit is actually sequential under asyncio.gather Un-skips the 5 previously-skipped tests in TestPostWithPlan. Closes the remaining parity gaps in #55. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 6 ++ src/chat_sdk/plan.py | 116 ++++++++++++++++++------ tests/test_plan.py | 23 ++--- tests/test_thread_faithful.py | 160 +++++++++++++++++++++++++++++----- 4 files changed, 245 insertions(+), 60 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 53f4ffc..cbbce6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,6 +58,12 @@ Parity catch-up with upstream `4.26.0`. No upstream version change. for `Chat.thread(id)`). Closes 8 fidelity gaps. - Ported 19 `[post with Plan]` tests from `thread.test.ts` — closes #55. +### Fixes (parity with upstream Plan semantics) + +- **`Plan.update_task(input)` / `StreamingPlan.update_task(input)` now honor `input.id`** — previously only worked on the last in-progress task; with `id` set, targets that specific task and returns `None` for unknown IDs. Matches upstream `UpdateTaskInput` semantics. +- **`Plan.add_task()` / `update_task()` now propagate `adapter.edit_object` errors** — previously swallowed and logged; upstream returns the chained promise so callers see failures. +- **Plan edit queue is now actually sequential under concurrency** — previously racy under `asyncio.gather`; rewrote `_enqueue_edit` to build the chain synchronously before awaiting, matching upstream TS's `.then`-based chain. Fixes out-of-order edits when multiple `add_task`/`update_task` calls interleave. + ### Test hygiene - Sweep remaining `time.sleep` → `await asyncio.sleep` in async tests diff --git a/src/chat_sdk/plan.py b/src/chat_sdk/plan.py index b1e2b54..69bc827 100644 --- a/src/chat_sdk/plan.py +++ b/src/chat_sdk/plan.py @@ -9,6 +9,7 @@ from __future__ import annotations import asyncio +import contextlib import uuid from dataclasses import dataclass, field from typing import Any, Literal @@ -72,8 +73,17 @@ class AddTaskOptions: @dataclass class UpdateTaskInput: - """Structured update input with optional output and status override.""" + """Structured update input targeting a task by ``id`` (or the last + in-progress task when ``id`` is omitted) with optional output and + status override. + + Mirrors upstream ``UpdateTaskInput`` shape (`plan.ts`): + ``{ id?: string; output?: PlanContent; status?: PlanTaskStatus }``. + When ``id`` is set but no matching task exists, ``update_task`` + returns ``None`` (matching upstream). + """ + id: str | None = None output: PlanContent | None = None status: PlanTaskStatus | None = None @@ -194,7 +204,10 @@ class _BoundState: message_id: str thread_id: str logger: Logger | None = None - update_chain: asyncio.Future[None] | None = None + # Tail of the synchronously-built edit chain. Each ``_enqueue_edit`` + # reads this, chains a new task after it, and assigns the new tail + # — all before yielding — so concurrent callers see FIFO ordering. + update_chain: asyncio.Task[None] | None = None # ============================================================================= @@ -329,24 +342,38 @@ async def add_task(self, options: AddTaskOptions) -> PlanTask | None: return PlanTask(id=next_task.id, title=next_task.title, status=next_task.status) async def update_task(self, update: PlanContent | UpdateTaskInput | None = None) -> PlanTask | None: - """Update the current in-progress task. + """Update a task on this plan. ``update`` can be: - - ``PlanContent`` (str, list, dict) -- sets the task output - - ``UpdateTaskInput`` -- sets output and/or status - - ``None`` -- just triggers a re-render + - ``PlanContent`` (str, list, dict) -- sets the output on the last + in-progress task (falling back to the last task). + - ``UpdateTaskInput`` -- sets output and/or status. When + ``update.id`` is set, targets that specific task and returns + ``None`` if no task matches. When ``id`` is omitted, behaves + like the PlanContent path (last in-progress task). + - ``None`` -- just triggers a re-render of the current state. """ if not self._can_mutate(): return None current: PlanModelTask | None = None - for t in reversed(self._model.tasks): - if t.status == "in_progress": - current = t - break - if current is None and self._model.tasks: - current = self._model.tasks[-1] - if current is None: - return None + if isinstance(update, UpdateTaskInput) and update.id is not None: + for t in self._model.tasks: + if t.id == update.id: + current = t + break + # Upstream returns null for a non-existent id rather than + # silently falling back to "last in-progress". + if current is None: + return None + else: + for t in reversed(self._model.tasks): + if t.status == "in_progress": + current = t + break + if current is None and self._model.tasks: + current = self._model.tasks[-1] + if current is None: + return None if update is not None: if isinstance(update, UpdateTaskInput): @@ -396,12 +423,27 @@ def _can_mutate(self) -> bool: async def _enqueue_edit(self) -> None: """Edit the posted message with the current plan state. - Chains edits sequentially to avoid race conditions. + Chains edits sequentially to avoid race conditions. Mirrors the + upstream TS pattern (`plan.ts`): + + ```ts + const chained = bound.updateChain.then(doEdit, doEdit); + bound.updateChain = chained.then(() => undefined, (err) => log); + return chained; + ``` + + Crucially, the new chain tail (``update_chain``) is registered + **synchronously** — before any ``await`` — so that concurrent + callers racing through ``asyncio.gather`` observe a strict FIFO + ordering. Errors from the adapter edit propagate to the caller + via the returned awaitable (``chained``); the internal chain + absorbs them so the next enqueued edit still runs. """ if self._bound is None: return bound = self._bound + prev = bound.update_chain # synchronous read — must not await first async def _do_edit() -> None: if bound.fallback: @@ -421,17 +463,35 @@ async def _do_edit() -> None: self._model, ) - # Chain edits: wait for previous edit to finish before starting new one - if bound.update_chain is not None: + async def _run_after_prev() -> None: + if prev is not None: + # Upstream ``.then(doEdit, doEdit)`` runs doEdit whether + # the previous edit resolved or rejected; mirror that by + # absorbing any exception from the previous step here. + # (Note: the internal chain tail absorbs errors anyway, + # so in practice ``await prev`` only raises if someone + # swapped the chain out with a rejecting future — the + # suppression keeps the parity guarantee defensive.) + with contextlib.suppress(BaseException): + await prev + await _do_edit() + + loop = asyncio.get_running_loop() + chained = loop.create_task(_run_after_prev()) + + async def _absorb_for_chain() -> None: + # The internal chain tail must not propagate errors — otherwise + # the next enqueued edit would await a rejected future and be + # treated as a previous-failure chain that still runs doEdit, + # but we'd also lose the ability to recover cleanly. Upstream + # uses ``chained.then(() => undefined, (err) => logger.warn)``. try: - await bound.update_chain - except Exception as prev_exc: - if bound.logger: - bound.logger.warn("Previous plan edit failed", prev_exc) - - try: - bound.update_chain = asyncio.get_running_loop().create_task(_do_edit()) - await bound.update_chain - except Exception as exc: - if bound.logger: - bound.logger.warn("Failed to edit plan", exc) + await chained + except BaseException as exc: # noqa: BLE001 — log and swallow for queue + if bound.logger is not None: + bound.logger.warn("Failed to edit plan", exc) + + bound.update_chain = loop.create_task(_absorb_for_chain()) + # ``chained`` preserves upstream semantics: exceptions from the + # adapter edit propagate to the caller. + await chained diff --git a/tests/test_plan.py b/tests/test_plan.py index 6a92ec3..1857b77 100644 --- a/tests/test_plan.py +++ b/tests/test_plan.py @@ -543,9 +543,13 @@ def error(self, message: str, *args: Any) -> None: class TestEditErrorPath: @pytest.mark.asyncio - async def test_edit_failure_is_logged_and_plan_continues(self) -> None: - """When adapter.edit_message raises, the error is logged and - the next mutation still fires successfully.""" + async def test_edit_failure_propagates_and_plan_continues(self) -> None: + """When ``adapter.edit_message`` raises, the caller sees the + exception (mirroring upstream TS ``enqueueEdit``), the failure + is logged by the internal chain tail, and the next mutation + still fires successfully without the previous rejection + poisoning the queue. + """ adapter = _FailingEditAdapter() logger = _SpyLogger() thread = _make_thread(adapter=adapter) @@ -556,20 +560,19 @@ async def test_edit_failure_is_logged_and_plan_continues(self) -> None: assert plan._bound is not None plan._bound.logger = logger - # First mutation: edit will fail + # First mutation: edit will fail — caller observes the error. adapter.fail_edit = True - await plan.add_task(AddTaskOptions(title="Step 2")) + with pytest.raises(RuntimeError, match="simulated edit failure"): + await plan.add_task(AddTaskOptions(title="Step 2")) - # The error should have been logged + # The internal chain absorbs the error and logs it so the queue + # is not poisoned for the next edit. assert any("Failed to edit plan" in w[0] for w in logger.warnings) - # Second mutation: edit succeeds -- plan is still usable + # Second mutation: edit succeeds -- plan is still usable. adapter.fail_edit = False logger.warnings.clear() task = await plan.add_task(AddTaskOptions(title="Step 3")) assert task is not None assert task.title == "Step 3" assert len(plan.tasks) == 3 - - # The previous-chain failure should also be logged - assert any("Previous plan edit failed" in w[0] for w in logger.warnings) diff --git a/tests/test_thread_faithful.py b/tests/test_thread_faithful.py index 13f34ec..721abd5 100644 --- a/tests/test_thread_faithful.py +++ b/tests/test_thread_faithful.py @@ -1651,16 +1651,64 @@ async def test_should_update_current_task_with_output(self): # it("should update a specific task by ID") @pytest.mark.asyncio async def test_should_update_a_specific_task_by_id(self): - # GAP: Python ``UpdateTaskInput`` has no ``id`` field; the TS - # variant accepts ``{ id, output?, status? }`` to target a specific - # task. Skipping until ``plan.py`` grows id-based lookup. - pytest.skip("Python UpdateTaskInput has no 'id' field (gap vs TS); track via follow-up to #55") + from chat_sdk.plan import ( + AddTaskOptions, + Plan, + StartPlanOptions, + UpdateTaskInput, + ) + + adapter = create_mock_adapter() + state = create_mock_state() + post_object = AsyncMock(return_value=RawMessage(id="plan-msg-1", thread_id="slack:C123:1234.5678", raw={})) + edit_object = AsyncMock(return_value=None) + adapter.post_object = post_object # type: ignore[attr-defined] + adapter.edit_object = edit_object # type: ignore[attr-defined] + + thread = _make_thread(adapter, state) + plan = Plan(StartPlanOptions(initial_message="Start")) + await thread.post(plan) + task1 = await plan.add_task(AddTaskOptions(title="Step 1")) + task2 = await plan.add_task(AddTaskOptions(title="Step 2")) + + assert task1 is not None + assert task2 is not None + + updated = await plan.update_task(UpdateTaskInput(id=task1.id, output="Step 1 result", status="complete")) + + assert updated is not None + assert updated.id == task1.id + assert updated.status == "complete" + + step2 = next((t for t in plan.tasks if t.id == task2.id), None) + assert step2 is not None + assert step2.status == "in_progress" # it("should return null when updating by non-existent ID") @pytest.mark.asyncio async def test_should_return_null_when_updating_by_nonexistent_id(self): - # GAP: Same as above — no id-based lookup path in Python update_task. - pytest.skip("Python UpdateTaskInput has no 'id' field (gap vs TS); track via follow-up to #55") + from chat_sdk.plan import ( + AddTaskOptions, + Plan, + StartPlanOptions, + UpdateTaskInput, + ) + + adapter = create_mock_adapter() + state = create_mock_state() + post_object = AsyncMock(return_value=RawMessage(id="plan-msg-1", thread_id="slack:C123:1234.5678", raw={})) + edit_object = AsyncMock(return_value=None) + adapter.post_object = post_object # type: ignore[attr-defined] + adapter.edit_object = edit_object # type: ignore[attr-defined] + + thread = _make_thread(adapter, state) + plan = Plan(StartPlanOptions(initial_message="Start")) + await thread.post(plan) + await plan.add_task(AddTaskOptions(title="Step 1")) + + updated = await plan.update_task(UpdateTaskInput(id="non-existent-id", output="nope")) + + assert updated is None # it("should still update last in_progress task when no ID provided") @pytest.mark.asyncio @@ -1770,14 +1818,44 @@ async def test_should_handle_various_plancontent_formats_in_initialmessage(self) # it("should ensure sequential edits via queue") @pytest.mark.asyncio async def test_should_ensure_sequential_edits_via_queue(self): - # GAP: Python ``_enqueue_edit`` awaits then re-binds ``update_chain`` - # so concurrent mutations can race and edits may arrive out of order - # (TS builds the chain synchronously with ``.then``). Skipping until - # the Python queue preserves strict ordering under ``asyncio.gather``. - pytest.skip( - "Python edit queue does not preserve strict ordering under gather (gap vs TS); track via follow-up to #55" + import asyncio + import random + + from chat_sdk.plan import AddTaskOptions, Plan, StartPlanOptions + + adapter = create_mock_adapter() + state = create_mock_state() + post_object = AsyncMock(return_value=RawMessage(id="plan-msg-1", thread_id="slack:C123:1234.5678", raw={})) + + edit_order: list[int] = [] + edit_count = 0 + + async def edit_object(thread_id: str, message_id: str, kind: str, data: Any) -> None: + nonlocal edit_count + edit_count += 1 + my_order = edit_count + # Simulate varying async delays so naive implementations + # (await-then-rebind chains) reorder under concurrency. + await asyncio.sleep(random.random() * 0.01) + edit_order.append(my_order) + + adapter.post_object = post_object # type: ignore[attr-defined] + adapter.edit_object = edit_object # type: ignore[attr-defined] + + thread = _make_thread(adapter, state) + plan = Plan(StartPlanOptions(initial_message="Start")) + await thread.post(plan) + + # Fire off multiple updates concurrently — each schedules an + # edit through the internal queue; FIFO ordering must hold. + await asyncio.gather( + plan.add_task(AddTaskOptions(title="Task 1")), + plan.update_task("Output 1"), + plan.add_task(AddTaskOptions(title="Task 2")), ) + assert edit_order == [1, 2, 3] + # it("should return null when calling addTask before post") @pytest.mark.asyncio async def test_should_return_null_when_calling_addtask_before_post(self): @@ -1810,20 +1888,58 @@ async def test_should_return_null_when_calling_complete_before_post(self): # it("should propagate editObject errors from addTask") @pytest.mark.asyncio async def test_should_propagate_editobject_errors_from_addtask(self): - # GAP: Python ``_enqueue_edit`` swallows exceptions (logs warn) so - # ``add_task`` does not reject when ``edit_object`` raises. Upstream - # returns the chained promise which rejects to the caller. Skipping - # until Python propagates the error. - pytest.skip("Python _enqueue_edit swallows edit_object errors (gap vs TS); track via follow-up to #55") + from chat_sdk.plan import AddTaskOptions, Plan, StartPlanOptions + + adapter = create_mock_adapter() + state = create_mock_state() + post_object = AsyncMock(return_value=RawMessage(id="plan-msg-1", thread_id="slack:C123:1234.5678", raw={})) + edit_object = AsyncMock(side_effect=RuntimeError("rate limited")) + adapter.post_object = post_object # type: ignore[attr-defined] + adapter.edit_object = edit_object # type: ignore[attr-defined] + + thread = _make_thread(adapter, state) + plan = Plan(StartPlanOptions(initial_message="Start")) + await thread.post(plan) + + with pytest.raises(RuntimeError, match="rate limited"): + await plan.add_task(AddTaskOptions(title="Task 1")) + + # Model mutation still happened (upstream asserts tasks.length === 2) + assert len(plan.tasks) == 2 # it("should continue accepting edits after a failed edit") @pytest.mark.asyncio async def test_should_continue_accepting_edits_after_a_failed_edit(self): - # GAP: Depends on the error-propagation fix above; after a rejected - # edit the queue must still accept new edits without the previous - # rejection poisoning the chain. Skipping until error propagation - # lands. - pytest.skip("Python _enqueue_edit swallows edit_object errors (gap vs TS); track via follow-up to #55") + from chat_sdk.plan import AddTaskOptions, Plan, StartPlanOptions + + adapter = create_mock_adapter() + state = create_mock_state() + post_object = AsyncMock(return_value=RawMessage(id="plan-msg-1", thread_id="slack:C123:1234.5678", raw={})) + + call_count = 0 + + async def edit_object(thread_id: str, message_id: str, kind: str, data: Any) -> None: + nonlocal call_count + call_count += 1 + if call_count == 1: + raise RuntimeError("rate limited") + return None + + adapter.post_object = post_object # type: ignore[attr-defined] + adapter.edit_object = edit_object # type: ignore[attr-defined] + + thread = _make_thread(adapter, state) + plan = Plan(StartPlanOptions(initial_message="Start")) + await thread.post(plan) + + with pytest.raises(RuntimeError): + await plan.add_task(AddTaskOptions(title="Task 1")) + + # A second add_task after a rejected edit must succeed — the + # queue's internal chain absorbs prior failures. + await plan.add_task(AddTaskOptions(title="Task 2")) + assert len(plan.tasks) == 3 + assert call_count == 2 # it("should set error status via updateTask") @pytest.mark.asyncio From 14fe9feaebbd881c2b00605c6c03010babfacebf Mon Sep 17 00:00:00 2001 From: patrick-chinchill Date: Thu, 23 Apr 2026 23:47:08 -0700 Subject: [PATCH 3/5] =?UTF-8?q?fix(plan):=20narrow=20BaseException=20?= =?UTF-8?q?=E2=86=92=20Exception=20+=20minor=20test=20cleanup?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Internal chain-absorption coroutine now only catches Exception so CancelledError / KeyboardInterrupt / SystemExit propagate correctly - Remove redundant local `import asyncio` in test_thread_faithful.py Co-Authored-By: Claude Opus 4.7 (1M context) --- src/chat_sdk/plan.py | 7 ++++++- tests/test_thread_faithful.py | 1 - 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/chat_sdk/plan.py b/src/chat_sdk/plan.py index 69bc827..f4ab3fa 100644 --- a/src/chat_sdk/plan.py +++ b/src/chat_sdk/plan.py @@ -485,9 +485,14 @@ async def _absorb_for_chain() -> None: # treated as a previous-failure chain that still runs doEdit, # but we'd also lose the ability to recover cleanly. Upstream # uses ``chained.then(() => undefined, (err) => logger.warn)``. + # + # Catch ``Exception`` (not ``BaseException``) so that + # ``asyncio.CancelledError``, ``KeyboardInterrupt``, and + # ``SystemExit`` propagate — only regular failures need to be + # absorbed here so the next enqueued edit can still run. try: await chained - except BaseException as exc: # noqa: BLE001 — log and swallow for queue + except Exception as exc: # noqa: BLE001 — log and swallow for queue if bound.logger is not None: bound.logger.warn("Failed to edit plan", exc) diff --git a/tests/test_thread_faithful.py b/tests/test_thread_faithful.py index 721abd5..1544bde 100644 --- a/tests/test_thread_faithful.py +++ b/tests/test_thread_faithful.py @@ -1818,7 +1818,6 @@ async def test_should_handle_various_plancontent_formats_in_initialmessage(self) # it("should ensure sequential edits via queue") @pytest.mark.asyncio async def test_should_ensure_sequential_edits_via_queue(self): - import asyncio import random from chat_sdk.plan import AddTaskOptions, Plan, StartPlanOptions From cb4af3ed11ff8ba63eb335568569e16d645108f1 Mon Sep 17 00:00:00 2001 From: patrick-chinchill Date: Fri, 24 Apr 2026 01:40:00 -0700 Subject: [PATCH 4/5] =?UTF-8?q?test(plan):=20port=20final=20[post=20with?= =?UTF-8?q?=20Plan]=20test=20=E2=80=94=20currentTask=20post-complete?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the last [post with Plan] fidelity gap. The existing test_current_task_tracks_in_progress in test_plan.py covers only initial + add_task branches; this port verifies the post-complete() fallback branch. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/test_thread_faithful.py | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/test_thread_faithful.py b/tests/test_thread_faithful.py index 1544bde..43cda40 100644 --- a/tests/test_thread_faithful.py +++ b/tests/test_thread_faithful.py @@ -1786,6 +1786,47 @@ async def test_should_reset_plan_and_start_fresh(self): assert len(plan.tasks) == 1 assert plan.tasks[0].status == "in_progress" + # it("should return currentTask correctly") + @pytest.mark.asyncio + async def test_should_return_currenttask_correctly(self): + from chat_sdk.plan import ( + AddTaskOptions, + CompletePlanOptions, + Plan, + StartPlanOptions, + ) + + adapter = create_mock_adapter() + state = create_mock_state() + post_object = AsyncMock(return_value=RawMessage(id="plan-msg-1", thread_id="slack:C123:1234.5678", raw={})) + edit_object = AsyncMock(return_value=None) + adapter.post_object = post_object # type: ignore[attr-defined] + adapter.edit_object = edit_object # type: ignore[attr-defined] + + thread = _make_thread(adapter, state) + plan = Plan(StartPlanOptions(initial_message="Start")) + await thread.post(plan) + + # Initially, current task is the first one + current = plan.current_task + assert current is not None + assert current.title == "Start" + assert current.status == "in_progress" + + # After adding a new task, current should be the new one + await plan.add_task(AddTaskOptions(title="Step 2")) + current = plan.current_task + assert current is not None + assert current.title == "Step 2" + assert current.status == "in_progress" + + # After completion, currentTask returns the last task + await plan.complete(CompletePlanOptions(complete_message="Done")) + current = plan.current_task + assert current is not None + assert current.title == "Step 2" + assert current.status == "complete" + # it("should handle various PlanContent formats in initialMessage") @pytest.mark.asyncio async def test_should_handle_various_plancontent_formats_in_initialmessage(self): From b3fe67035f25fdcdf614a2efe381bb42049661cb Mon Sep 17 00:00:00 2001 From: patrick-chinchill Date: Fri, 24 Apr 2026 01:54:46 -0700 Subject: [PATCH 5/5] =?UTF-8?q?fix(plan):=20narrow=20contextlib.suppress(B?= =?UTF-8?q?aseException)=20=E2=86=92=20Exception=20for=20consistency?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parallels the round-2 except-block narrowing on _absorb_for_chain so CancelledError / KeyboardInterrupt / SystemExit propagate correctly through both error paths in the edit queue. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/chat_sdk/plan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chat_sdk/plan.py b/src/chat_sdk/plan.py index f4ab3fa..798f799 100644 --- a/src/chat_sdk/plan.py +++ b/src/chat_sdk/plan.py @@ -472,7 +472,7 @@ async def _run_after_prev() -> None: # so in practice ``await prev`` only raises if someone # swapped the chain out with a rejecting future — the # suppression keeps the parity guarantee defensive.) - with contextlib.suppress(BaseException): + with contextlib.suppress(Exception): await prev await _do_edit()