From 703ff8aa7ea4e8c95d7a2280cf6429c5c9c124db Mon Sep 17 00:00:00 2001 From: Sami Rusani Date: Wed, 8 Apr 2026 00:16:31 +0200 Subject: [PATCH] P9-S36: ship OpenClaw adapter --- .ai/active/SPRINT_PACKET.md | 255 +++++------ .../CURRENT_STATE.md | 165 +++++++ .../SPRINT_PACKET.md | 232 ++++++++++ .ai/handoff/CURRENT_STATE.md | 34 +- ARCHITECTURE.md | 26 +- BUILD_REPORT.md | 149 ++++--- README.md | 28 +- REVIEW_REPORT.md | 54 +-- ROADMAP.md | 5 +- RULES.md | 1 + apps/api/src/alicebot_api/openclaw_adapter.py | 418 ++++++++++++++++++ apps/api/src/alicebot_api/openclaw_import.py | 124 ++++++ apps/api/src/alicebot_api/openclaw_models.py | 200 +++++++++ .../ADR-004-openclaw-integration-boundary.md | 50 +++ .../ARCHITECTURE.md | 107 +++++ .../PRODUCT_BRIEF.md | 77 ++++ .../2026-04-07-phase9-bootstrap/README.md | 149 +++++++ .../2026-04-07-phase9-bootstrap/ROADMAP.md | 141 ++++++ .../2026-04-07-phase9-bootstrap/RULES.md | 41 ++ docs/phase9-sprint-33-38-plan.md | 8 +- fixtures/openclaw/workspace_v1.json | 88 ++++ scripts/load_openclaw_sample_data.py | 102 +++++ scripts/load_openclaw_sample_data.sh | 20 + tests/integration/test_openclaw_import.py | 88 ++++ .../test_openclaw_mcp_integration.py | 178 ++++++++ tests/unit/test_openclaw_adapter.py | 99 +++++ 26 files changed, 2579 insertions(+), 260 deletions(-) create mode 100644 .ai/archive/planning/2026-04-07-phase9-bootstrap/CURRENT_STATE.md create mode 100644 .ai/archive/planning/2026-04-07-phase9-bootstrap/SPRINT_PACKET.md create mode 100644 apps/api/src/alicebot_api/openclaw_adapter.py create mode 100644 apps/api/src/alicebot_api/openclaw_import.py create mode 100644 apps/api/src/alicebot_api/openclaw_models.py create mode 100644 docs/adr/ADR-004-openclaw-integration-boundary.md create mode 100644 docs/archive/planning/2026-04-07-phase9-bootstrap/ARCHITECTURE.md create mode 100644 docs/archive/planning/2026-04-07-phase9-bootstrap/PRODUCT_BRIEF.md create mode 100644 docs/archive/planning/2026-04-07-phase9-bootstrap/README.md create mode 100644 docs/archive/planning/2026-04-07-phase9-bootstrap/ROADMAP.md create mode 100644 docs/archive/planning/2026-04-07-phase9-bootstrap/RULES.md create mode 100644 fixtures/openclaw/workspace_v1.json create mode 100755 scripts/load_openclaw_sample_data.py create mode 100755 scripts/load_openclaw_sample_data.sh create mode 100644 tests/integration/test_openclaw_import.py create mode 100644 tests/integration/test_openclaw_mcp_integration.py create mode 100644 tests/unit/test_openclaw_adapter.py diff --git a/.ai/active/SPRINT_PACKET.md b/.ai/active/SPRINT_PACKET.md index c66cd61..ed3351b 100644 --- a/.ai/active/SPRINT_PACKET.md +++ b/.ai/active/SPRINT_PACKET.md @@ -2,7 +2,7 @@ ## Sprint Title -Phase 9 Sprint 35 (P9-S35): MCP Server +Phase 9 Sprint 36 (P9-S36): OpenClaw Adapter ## Sprint Type @@ -10,7 +10,7 @@ feature ## Sprint Reason -`P9-S33` established the public-safe `alice-core` package boundary and startup path. `P9-S34` established the deterministic local CLI contract. The next non-redundant seam is exposing that same continuity contract through a narrow MCP server so external assistants can use Alice without reopening core behavior. +`P9-S33` shipped the public-safe `alice-core` boundary and startup path. `P9-S34` shipped the deterministic local CLI contract. `P9-S35` shipped the narrow MCP transport. The next non-redundant seam is proving Alice is agent-agnostic by wiring one concrete external adapter against the already-shipped CLI/MCP continuity contract. ## Planning Anchors @@ -21,23 +21,24 @@ feature - `docs/adr/ADR-001-public-core-package-boundary.md` - `docs/adr/ADR-002-public-runtime-baseline.md` - `docs/adr/ADR-003-mcp-tool-surface-contract.md` +- `docs/adr/ADR-004-openclaw-integration-boundary.md` if introduced ## Sprint Objective -Ship a small deterministic MCP server for Alice continuity flows so one external MCP-capable client can call capture, recall, resume, open-loop, review, correction, and context-pack tools against the shipped local `alice-core` runtime. +Ship the first OpenClaw adapter path so a sample or real OpenClaw workspace can be imported into Alice, queried through Alice recall/resumption, and optionally consumed through the shipped MCP wedge without changing Alice continuity semantics. ## Git Instructions -- Branch Name: `codex/phase9-sprint-35-mcp-server` +- Branch Name: `codex/phase9-sprint-36-openclaw-adapter` - Base Branch: `main` - PR Strategy: one sprint branch, one PR - Merge Policy: squash merge only after reviewer `PASS` and explicit Control Tower merge approval ## Why This Sprint Matters -- It is the first real interop transport for external assistants. -- It should inherit the already-shipped `P9-S34` CLI semantics instead of inventing a second behavior model. -- It turns Alice from a local tool into a reusable memory layer for external agent clients. +- It is the first proof that Alice works as an interoperable memory layer, not just a standalone local tool. +- It validates the Phase 9 thesis using one real external agent stack instead of abstract compatibility claims. +- It sets the adapter/import boundary ahead of broader importer work in `P9-S37`. ## Redundancy Guard @@ -49,102 +50,101 @@ Ship a small deterministic MCP server for Alice continuity flows so one external - Phase 8 operational chief-of-staff handoff, queue, routing, and outcome-learning seams. - `P9-S33` public-safe packaging, startup path, and sample-data baseline. - `P9-S34` deterministic local CLI contract for continuity workflows. -- Required now (`P9-S35`): - - narrow MCP transport for the shipped continuity contract - - deterministic tool schemas and serialization - - one local client interoperability proof - - parity tests between MCP outputs and shipped CLI/core behavior where relevant -- Explicitly out of `P9-S35`: - - OpenClaw adapter implementation - - importer expansion - - hosted auth or remote deployment systems - - widening the tool surface beyond the ADR-defined initial wedge - - reopening `P9-S33` packaging or `P9-S34` CLI semantics unless transport parity exposes a real defect + - `P9-S35` deterministic MCP transport for the shipped continuity contract. +- Required now (`P9-S36`): + - OpenClaw adapter/import boundary + - file-based import path for OpenClaw workspace or durable memory data + - imported provenance tagging and dedupe stance + - recall/resumption proof on imported OpenClaw material + - optional MCP augmentation proof using imported data through the shipped tool surface +- Explicitly out of `P9-S36`: + - broad importer set beyond the OpenClaw adapter path + - widening the MCP tool surface + - hosted deployment or remote auth work + - launch assets / public release polish + - reopening CLI or MCP semantics unless adapter integration exposes a real parity defect ## Design Truth -- MCP is a transport layer over the shipped Alice continuity contract, not a new product surface with different semantics. -- Tool outputs must stay deterministic, provenance-backed, and narrowly scoped. -- External clients should get the same essential behavior as the local CLI for the same dataset and scope. -- The first MCP release should privilege stability and auditability over breadth. +- OpenClaw integration should prove Alice can augment an external agent stack without becoming a generic platform wrapper. +- The adapter should map external state into shipped Alice continuity objects with explicit provenance, not bypass Alice’s trust and correction model. +- Imported material should remain queryable through the same recall/resumption semantics as native Alice data. +- The adapter boundary should stay narrow enough that later importer work can generalize from it. ## Exact Surfaces In Scope -- local MCP server entrypoint and runtime wiring -- deterministic tool schemas for the initial ADR-backed tool set -- transport wrappers for shipped continuity flows -- context-pack output where it can be defined directly from shipped continuity seams -- local auth/config model for MCP use on the documented startup path -- docs and examples for one compatible MCP client -- parity and transport tests for the scoped tool set +- OpenClaw import/adapter module(s) +- file-based input contract for OpenClaw workspace or durable memory export +- import mapping into shipped Alice continuity objects +- provenance tagging and dedupe behavior for imported material +- one documented local demo path for import -> recall/resume +- optional MCP augmentation proof against imported data +- tests and fixtures for the adapter path ## Exact Files In Scope - `.ai/active/SPRINT_PACKET.md` - `.ai/handoff/CURRENT_STATE.md` +- `ARCHITECTURE.md` - `README.md` - `ROADMAP.md` -- `pyproject.toml` -- `apps/api/src/alicebot_api/__init__.py` -- `apps/api/src/alicebot_api/config.py` -- `apps/api/src/alicebot_api/mcp_server.py` if introduced -- `apps/api/src/alicebot_api/mcp_tools.py` if introduced -- `apps/api/src/alicebot_api/mcp_models.py` if introduced -- `apps/api/src/alicebot_api/cli.py` if parity-alignment fixes are required -- `apps/api/src/alicebot_api/cli_formatting.py` if parity-alignment fixes are required -- `apps/api/src/alicebot_api/continuity_capture.py` -- `apps/api/src/alicebot_api/continuity_recall.py` -- `apps/api/src/alicebot_api/continuity_resumption.py` -- `apps/api/src/alicebot_api/continuity_open_loops.py` -- `apps/api/src/alicebot_api/continuity_review.py` -- `apps/api/src/alicebot_api/chief_of_staff.py` if `alice_context_pack` is implemented through existing brief assembly -- `tests/unit/test_mcp.py` if introduced -- `tests/integration/test_mcp_server.py` if introduced -- `tests/integration/test_mcp_cli_parity.py` if introduced +- `RULES.md` +- `docs/phase9-sprint-33-38-plan.md` +- `pyproject.toml` if adapter packaging entrypoints are introduced +- `apps/api/src/alicebot_api/openclaw_adapter.py` if introduced +- `apps/api/src/alicebot_api/openclaw_models.py` if introduced +- `apps/api/src/alicebot_api/openclaw_import.py` if introduced +- `apps/api/src/alicebot_api/mcp_tools.py` if parity-alignment is required +- `apps/api/src/alicebot_api/continuity_capture.py` if adapter ingestion reuses capture helpers +- `apps/api/src/alicebot_api/continuity_recall.py` if import parity fixes are required +- `apps/api/src/alicebot_api/continuity_resumption.py` if import parity fixes are required +- `apps/api/src/alicebot_api/store.py` +- `scripts/load_openclaw_sample_data.py` if introduced +- `scripts/load_openclaw_sample_data.sh` if introduced +- `fixtures/openclaw/` if introduced +- `docs/adr/ADR-004-openclaw-integration-boundary.md` if introduced +- `.ai/archive/planning/2026-04-07-phase9-bootstrap/` if bootstrap planning state is archived for traceability +- `docs/archive/planning/2026-04-07-phase9-bootstrap/` if canonical planning docs are archived for traceability +- `tests/unit/test_openclaw_adapter.py` if introduced +- `tests/integration/test_openclaw_import.py` if introduced +- `tests/integration/test_openclaw_mcp_integration.py` if introduced - `BUILD_REPORT.md` - `REVIEW_REPORT.md` ## In Scope -- support an initial MCP tool set aligned to ADR-003: - - `alice_capture` - - `alice_recall` - - `alice_resume` - - `alice_open_loops` - - `alice_recent_decisions` - - `alice_recent_changes` - - `alice_memory_review` - - `alice_memory_correct` - - `alice_context_pack` -- define deterministic request/response shapes for those tools -- make one local MCP client call recall and resume successfully -- prove correction via MCP changes later retrieval behavior deterministically -- document exact local MCP startup/use path without changing the canonical `P9-S33` runtime flow +- define the first-class OpenClaw adapter boundary +- import a sample or real OpenClaw workspace / durable memory export into Alice +- preserve source provenance on imported material +- make imported memory visible through Alice recall and resumption +- document exact local import and demo steps +- keep MCP augmentation proof limited to using the already-shipped tool surface on imported data ## Out Of Scope -- OpenClaw or other adapters -- broad tool-surface expansion beyond the ADR -- hosted or remote auth systems -- general-purpose agent execution tools -- broad repo restructuring -- replacing CLI as the reference behavior contract +- generic importer framework for all sources +- ChatGPT/Claude/markdown/CSV importer bundle +- MCP tool-surface expansion +- hosted adapter services +- broad repo packaging changes +- public launch polish and release assets ## Required Deliverables -- packaged or runnable local MCP server entrypoint -- deterministic initial tool schemas and handlers -- one compatibility example for a real MCP client -- parity evidence showing MCP reflects shipped Alice continuity behavior -- synced docs and sprint reports +- runnable OpenClaw adapter/import path +- sample or documented real OpenClaw fixture path +- provenance-preserving import mapping +- recall/resumption proof against imported data +- optional MCP proof against imported data if used to validate augmentation mode +- synced docs, reports, and any new ADR boundary needed for the adapter ## Acceptance Criteria -- one MCP-capable client can call `alice_recall` successfully against the local runtime -- one MCP-capable client can call `alice_resume` successfully against the local runtime -- correction through `alice_memory_correct` changes a later retrieval/result deterministically -- MCP outputs remain narrow, deterministic, and provenance-backed -- the MCP tool contract is stable enough that `P9-S36` and `P9-S37` can build on it without reopening transport semantics +- a sample or real OpenClaw workspace can be imported through the documented path +- imported material becomes queryable via Alice recall +- imported material contributes useful output to Alice resumption briefs +- imported provenance is explicit enough to distinguish adapter-ingested material from native Alice capture +- if MCP augmentation is exercised, one shipped MCP tool path works successfully against imported data without widening the tool contract ## Required Commands @@ -156,112 +156,115 @@ docker compose up -d ./scripts/load_sample_data.sh ./scripts/api_dev.sh curl -sS http://127.0.0.1:8000/healthz -./.venv/bin/python -m alicebot_api --help ./.venv/bin/python -m pytest tests/unit tests/integration pnpm --dir apps/web test ``` -If a dedicated MCP server entrypoint or local MCP smoke command is introduced this sprint, it must be run and included in review evidence alongside one real client interoperability proof. +If a dedicated OpenClaw import command or adapter loader is introduced this sprint, it must be run and included in review evidence together with at least one recall and one resumption proof against imported data. ## Required Acceptance Evidence -- exact MCP startup path used during verification -- exact client/config used for interoperability proof -- one successful `alice_recall` tool call -- one successful `alice_resume` tool call -- one successful correction flow showing later retrieval changed deterministically -- note of any intentionally deferred MCP ergonomics or auth concerns +- exact OpenClaw input fixture or workspace path used during verification +- exact import command/path used during verification +- one successful recall example against imported data +- one successful resumption example against imported data +- note of import provenance and dedupe posture actually observed +- if used, one successful shipped MCP tool call against imported data ## Implementation Constraints -- preserve shipped P5/P6/P7/P8/P9-S33/P9-S34 semantics -- keep the MCP surface narrow and ADR-aligned -- keep transport payloads deterministic and easily diffable -- do not introduce unsafe autonomous side effects -- prefer parity with shipped CLI/core behavior over transport cleverness +- preserve shipped P5/P6/P7/P8/P9-S33/P9-S34/P9-S35 semantics +- do not bypass Alice continuity objects or correction semantics for imported data +- keep the adapter narrow and specific to OpenClaw in this sprint +- keep provenance explicit and deterministic +- prefer an auditable import path over a “magic sync” abstraction ## Control Tower Task Cards -### Task 1: MCP Entry and Schemas +### Task 1: Adapter Boundary and Models -Owner: platform/interop owner +Owner: interop/adapter owner Write scope: -- `pyproject.toml` -- `apps/api/src/alicebot_api/__init__.py` -- `apps/api/src/alicebot_api/mcp_server.py` -- `apps/api/src/alicebot_api/mcp_tools.py` -- `apps/api/src/alicebot_api/mcp_models.py` +- `apps/api/src/alicebot_api/openclaw_adapter.py` +- `apps/api/src/alicebot_api/openclaw_models.py` +- `apps/api/src/alicebot_api/openclaw_import.py` +- `docs/adr/ADR-004-openclaw-integration-boundary.md` Responsibilities: -- define the runnable MCP server entrypoint -- keep the tool surface narrow and stable -- keep schema names and payloads deterministic -- avoid leaking internal-only helper seams +- define the OpenClaw import boundary +- define supported file/input shapes for the first adapter pass +- keep provenance and dedupe rules explicit +- avoid drifting into generic importer-framework work -### Task 2: Continuity Transport Wiring +### Task 2: Continuity Mapping and Storage Owner: backend/runtime owner Write scope: -- `apps/api/src/alicebot_api/config.py` +- `apps/api/src/alicebot_api/store.py` - `apps/api/src/alicebot_api/continuity_capture.py` - `apps/api/src/alicebot_api/continuity_recall.py` - `apps/api/src/alicebot_api/continuity_resumption.py` -- `apps/api/src/alicebot_api/continuity_open_loops.py` -- `apps/api/src/alicebot_api/continuity_review.py` -- `apps/api/src/alicebot_api/chief_of_staff.py` +- `apps/api/src/alicebot_api/mcp_tools.py` Responsibilities: -- map tool calls directly onto shipped continuity behavior -- expose provenance/trust signals consistently -- keep context-pack behavior grounded in shipped brief assembly -- fix only true parity gaps exposed during transport integration +- map imported OpenClaw material into shipped Alice continuity semantics +- preserve deterministic retrieval/resumption behavior +- expose imported provenance through recall/resumption/MCP where relevant +- fix only true parity gaps exposed by the adapter -### Task 3: Docs and Interop Example +### Task 3: Fixtures, Demo Path, and Docs Owner: docs/integration owner Write scope: +- `ARCHITECTURE.md` - `README.md` - `ROADMAP.md` +- `RULES.md` - `.ai/handoff/CURRENT_STATE.md` +- `docs/phase9-sprint-33-38-plan.md` +- `fixtures/openclaw/` +- `scripts/load_openclaw_sample_data.py` +- `scripts/load_openclaw_sample_data.sh` Responsibilities: -- document exact MCP startup path -- document one compatible local client example -- keep startup/sample-data instructions unchanged from `P9-S33` -- make the next seam toward adapters/importers explicit +- provide one reproducible local OpenClaw demo path +- document exact import steps and expected outcomes +- keep startup/sample-data guidance from `P9-S33` unchanged +- keep architecture/rules/planning docs aligned with the shipped adapter boundary and importer posture +- make the next seam toward broader importers/eval explicit -### Task 4: Verification and Parity +### Task 4: Verification and Interop Proof Owner: sprint integrator Write scope: -- `tests/unit/test_mcp.py` -- `tests/integration/test_mcp_server.py` -- `tests/integration/test_mcp_cli_parity.py` +- `tests/unit/test_openclaw_adapter.py` +- `tests/integration/test_openclaw_import.py` +- `tests/integration/test_openclaw_mcp_integration.py` - `BUILD_REPORT.md` - `REVIEW_REPORT.md` Responsibilities: -- prove recall/resume work through a real MCP client path -- prove correction changes later retrieval deterministically -- keep parity evidence explicit against shipped CLI/core behavior -- keep scope hygiene explicit if support files are touched +- prove import works against the documented fixture/workspace shape +- prove recall/resumption work against imported data +- prove any MCP augmentation path stays within the shipped tool contract +- keep scope hygiene explicit if supporting files are touched ## Definition Of Done -- `P9-S35` MCP server exists and is runnable from the documented local install -- the initial ADR-backed tool surface is implemented and deterministic -- one real client interoperability proof exists for recall and resume +- `P9-S36` OpenClaw adapter/import path exists and is runnable from the documented local install +- imported OpenClaw data is queryable through shipped Alice recall/resumption semantics +- provenance and dedupe posture are explicit and reviewable - docs, tests, build report, and review report are aligned -- no adapter or importer work leaks into the sprint +- no broad importer-bundle or launch-polish work leaks into the sprint diff --git a/.ai/archive/planning/2026-04-07-phase9-bootstrap/CURRENT_STATE.md b/.ai/archive/planning/2026-04-07-phase9-bootstrap/CURRENT_STATE.md new file mode 100644 index 0000000..fdada5a --- /dev/null +++ b/.ai/archive/planning/2026-04-07-phase9-bootstrap/CURRENT_STATE.md @@ -0,0 +1,165 @@ +# Current State + +## Canonical Truth + +- The canonical baseline remains through Phase 3 Sprint 9. +- Phase 4 release-control layer remains the canonical gate ownership compatibility baseline. +- Active Sprint focus is Phase 4 Sprint 14. +- Active sprint focus is Phase 8 Sprint 32 outcome learning and closure quality. +- Earlier Phase 4 work is already delivered: task-run linkage to approvals/executions, idempotent proxy execution replay guards, approval pause/resume continuity for linked runs, run transition observability, explicit stop reasons, bounded retries with persisted posture, explicit failure classes, and deterministic Phase 4 gate runners. +- Phase 4 Sprint 14-19 release-control/sign-off delivery is shipped baseline, and Phase 5 Sprint 17-20 continuity delivery is shipped baseline. +- Latest shipped post-Phase-5 packet is P6-S24 trust dashboard and quality release evidence. +- P8-S29 chief-of-staff action handoff artifacts are shipped baseline. +- P8-S30 chief-of-staff handoff queue and operational review is shipped baseline. +- P8-S31 chief-of-staff governed execution routing is shipped baseline. +- P8-S32 chief-of-staff outcome learning and closure quality implementation is now in place for the active sprint packet. +- Active post-Phase-7 packet is P8-S32 outcome learning and closure quality. +- Phase 8 planning anchors are: + - `docs/phase8-product-spec.md` + - `docs/phase8-sprint-29-32-plan.md` +- The accepted baseline includes deterministic Phase 3 gate entrypoints: `python3 scripts/run_phase3_acceptance.py`, `python3 scripts/run_phase3_readiness_gates.py`, and `python3 scripts/run_phase3_validation_matrix.py` (default go/no-go command). +- Phase 4 gate entrypoints are `python3 scripts/run_phase4_acceptance.py`, `python3 scripts/run_phase4_readiness_gates.py`, and `python3 scripts/run_phase4_validation_matrix.py`. +- Phase 4 release-candidate rehearsal entrypoint is `python3 scripts/run_phase4_release_candidate.py`, which writes latest summary evidence at `artifacts/release/phase4_rc_summary.json` and appends retained archive/index evidence under `artifacts/release/archive/` for repeated-run audit, with deterministic archive index lock path `artifacts/release/archive/index.lock`, bounded lock-timeout failure contract, and atomic index replace writes. +- Archive audit verifier entrypoint is `python3 scripts/verify_phase4_rc_archive.py`. +- Phase 4 MVP exit manifest entrypoints are `python3 scripts/generate_phase4_mvp_exit_manifest.py` and `python3 scripts/verify_phase4_mvp_exit_manifest.py`, producing deterministic closeout evidence at `artifacts/release/phase4_mvp_exit_manifest.json` from latest GO RC archive evidence. +- Phase 4 MVP qualification/sign-off entrypoints are `python3 scripts/run_phase4_mvp_qualification.py` and `python3 scripts/verify_phase4_mvp_signoff_record.py`, producing deterministic qualification evidence at `artifacts/release/phase4_mvp_signoff_record.json` with ordered gate statuses, GO/NO_GO, and blocker registry. +- Gate ownership is canonicalized to Phase 4 runner script names; Phase 3/Phase 2/MVP commands remain supported compatibility entrypoints with identical semantics. +- Use [PRODUCT_BRIEF.md](../../PRODUCT_BRIEF.md) for product scope, [ARCHITECTURE.md](../../ARCHITECTURE.md) for implemented boundaries, [ROADMAP.md](../../ROADMAP.md) for forward planning, and [RULES.md](../../RULES.md) for durable operating rules. +- The live sprint reports are [BUILD_REPORT.md](../../BUILD_REPORT.md) and [REVIEW_REPORT.md](../../REVIEW_REPORT.md) at repo root; older accepted sprint history belongs in [docs/archive/sprints](../../docs/archive/sprints), not in this handoff. + +## Implemented Surfaces + +- `apps/api` is the core shipped product surface. It implements continuity, context compilation, assistant responses, typed memory and open-loop seams, deterministic thread resumption brief reads, unified explicit-signal capture seams, policy/tool/approval governance, execution budgets, tasks and task steps, rooted local workspaces and artifacts, artifact chunk retrieval and embeddings, traces, and narrow read-only Gmail and Calendar seams with selected-item ingestion plus bounded Calendar event discovery. +- Phase 7 Sprint 25 + Sprint 26 chief-of-staff implementation is now in place: + - `GET /v0/chief-of-staff` compiles deterministic ranked priorities from shipped continuity and trust seams. + - ranked items include explicit posture label (`urgent`, `important`, `waiting`, `blocked`, `stale`, `defer`), provenance-backed rationale payload, and trust-aware confidence posture. + - recommendation payload includes one deterministic next action with explicit confidence posture and provenance references. + - deterministic follow-through supervision fields are shipped: `overdue_items`, `stale_waiting_for_items`, `slipped_commitments`, `escalation_posture`, and governed `draft_follow_up`. + - each follow-through item includes explicit current priority posture, follow-through posture, deterministic recommendation action (`nudge`, `defer`, `escalate`, `close_loop_candidate`), and rationale. + - `draft_follow_up` is artifact-only and approval-bounded (`mode=draft_only`, `approval_required=true`, `auto_send=false`). +- Phase 7 Sprint 27 chief-of-staff preparation/resumption implementation is now in place: + - `GET /v0/chief-of-staff` now also returns deterministic `preparation_brief`, `what_changed_summary`, `prep_checklist`, `suggested_talking_points`, and `resumption_supervision`. + - preparation and resumption recommendations are provenance-backed and explicitly trust-calibrated. + - low-trust memory posture visibly downgrades preparation/resumption confidence posture. + - `/chief-of-staff` now includes a dedicated preparation panel with rationale and provenance visibility. +- Phase 7 Sprint 28 chief-of-staff weekly review/outcome-learning implementation is now in place: + - `GET /v0/chief-of-staff` now also returns deterministic `weekly_review_brief`, `recommendation_outcomes`, `priority_learning_summary`, and `pattern_drift_summary`. + - weekly-review guidance explicitly ranks `close` / `defer` / `escalate` actions with deterministic rationale and signal counts. + - `POST /v0/chief-of-staff/recommendation-outcomes` captures deterministic recommendation handling outcomes (`accept`, `defer`, `ignore`, `rewrite`) as auditable continuity records. + - `/chief-of-staff` now includes a dedicated weekly review and learning panel with explicit outcome-capture controls and drift visibility. +- Phase 8 Sprint 29 chief-of-staff action-handoff implementation is now in place: + - `GET /v0/chief-of-staff` now also returns deterministic `action_handoff_brief`, `handoff_items`, `task_draft`, `approval_draft`, and explicit `execution_posture`. + - handoff artifacts deterministically map top actionable recommendations from priority/follow-through/preparation/weekly-review seams into governed task/approval-ready draft structures with rationale and provenance. + - execution posture is explicit and non-autonomous (`approval_bounded_artifact_only`, `approval_required=true`, `autonomous_execution=false`, no external side effects). + - `/chief-of-staff` now includes a dedicated action handoff panel with posture visibility and draft artifact review. +- Phase 8 Sprint 30 chief-of-staff handoff-queue and operational-review implementation is now in place: + - `GET /v0/chief-of-staff` now also returns deterministic `handoff_queue_summary`, `handoff_queue_groups`, and `handoff_review_actions`. + - queue lifecycle states are explicit (`ready`, `pending_approval`, `executed`, `stale`, `expired`) with deterministic grouping/ordering posture and explicit stale/expired surfacing. + - `POST /v0/chief-of-staff/handoff-review-actions` captures explicit operator queue lifecycle transitions as auditable continuity records. + - `/chief-of-staff` now includes a dedicated grouped handoff queue panel with explicit review-action controls and review-action history visibility. +- Phase 8 Sprint 31 chief-of-staff governed execution-routing implementation is now in place: + - `GET /v0/chief-of-staff` now also returns deterministic `execution_routing_summary`, `routed_handoff_items`, `routing_audit_trail`, and `execution_readiness_posture`. + - `POST /v0/chief-of-staff/execution-routing-actions` captures explicit routing transitions into governed draft targets (`task_workflow_draft`, `approval_workflow_draft`, `follow_up_draft_only`). + - routing transitions are explicit and auditable (`routed`, `reaffirmed`) while preserving approval-required draft-only posture. + - `/chief-of-staff` now includes a dedicated execution routing panel with readiness posture visibility, route controls, and auditable transition history. +- Phase 8 Sprint 32 chief-of-staff outcome-learning and closure-quality implementation is now in place: + - `GET /v0/chief-of-staff` now also returns deterministic `handoff_outcome_summary`, `handoff_outcomes`, `closure_quality_summary`, `conversion_signal_summary`, and `stale_ignored_escalation_posture`. + - `POST /v0/chief-of-staff/handoff-outcomes` captures explicit routed-handoff outcomes (`reviewed`, `approved`, `rejected`, `rewritten`, `executed`, `ignored`, `expired`) as immutable continuity records. + - outcome rollups are deterministic and latest-state driven per handoff item, with explicit closure-quality, conversion-signal, and stale/ignored escalation posture explanations. + - `/chief-of-staff` now includes a dedicated outcome-learning panel with explicit outcome-capture controls and closure/conversion/escalation visibility. +- Phase 5 Sprint 17 adds typed continuity capture seams: + - `POST /v0/continuity/captures` always appends an immutable capture event and conservatively admits typed durable objects. + - `GET /v0/continuity/captures` returns inbox rows with admission posture (`DERIVED`/`TRIAGE`) and optional derived object summary. + - `GET /v0/continuity/captures/{capture_event_id}` returns capture detail with derived object and provenance when admitted. +- Phase 5 Sprint 18 adds continuity recall/resumption seams: + - `GET /v0/continuity/recall` returns provenance-backed scoped recall results with deterministic ordering metadata, confirmation status, and admission posture. + - `GET /v0/continuity/resumption-brief` compiles deterministic brief sections (`last_decision`, `open_loops`, `recent_changes`, `next_action`) with explicit empty states. +- Phase 5 Sprint 19 adds continuity review/correction seams: + - `GET /v0/continuity/review-queue` returns correction-ready continuity objects with deterministic posture filtering. + - `GET /v0/continuity/review-queue/{continuity_object_id}` returns selected-object review detail, correction-event history, and supersession chain links. + - `POST /v0/continuity/review-queue/{continuity_object_id}/corrections` applies deterministic `confirm`/`edit`/`delete`/`supersede`/`mark_stale` actions. + - correction events are append-only and persisted before lifecycle mutation. + - continuity recall/resumption now reflects correction posture immediately, including freshness/supersession metadata (`last_confirmed_at`, `supersedes_object_id`, `superseded_by_object_id`) and deleted-object exclusion from recall payloads. +- Phase 5 Sprint 20 adds deterministic open-loop review and briefing seams: + - `GET /v0/continuity/open-loops` returns grouped posture sections (`waiting_for`, `blocker`, `stale`, `next_action`) with deterministic ordering metadata. + - `GET /v0/continuity/daily-brief` returns deterministic daily sections (`waiting_for_highlights`, `blocker_highlights`, `stale_items`, `next_suggested_action`) with explicit empty states. + - `GET /v0/continuity/weekly-review` returns deterministic grouped sections plus posture rollup counts. + - `POST /v0/continuity/open-loops/{continuity_object_id}/review-action` applies deterministic `done`/`deferred`/`still_blocked` actions with auditable correction-event payload mapping. + - continuity resumption reflects open-loop review-action outcomes immediately. +- Phase 6 Sprint 21 adds canonical memory-quality and deterministic review-priority seams: + - `GET /v0/memories/quality-gate` returns deterministic canonical gate status (`healthy`, `needs_review`, `insufficient_sample`, `degraded`) with precision/sample/risk counts and explicit computation backing fields. + - `GET /v0/memories/review-queue` supports deterministic priority modes (`oldest_first`, `recent_first`, `high_risk_first`, `stale_truth_first`) with explicit summary ordering metadata plus per-item priority posture (`is_high_risk`, `is_stale_truth`, `priority_reason`). + - `/memories` consumes API-backed quality-gate contract and exposes queue priority-mode selection without changing label vocabulary or submit flow semantics. +- Phase 6 Sprint 22 adds retrieval-quality evaluation and recall ranking calibration seams: + - `GET /v0/continuity/retrieval-evaluation` returns deterministic fixture-backed precision summaries (`precision_at_k_mean`, `precision_at_1_mean`, `precision_target`) and top-result ordering evidence for each fixture query. + - `GET /v0/continuity/recall` ranking is calibrated for confirmation, freshness posture, provenance quality, and supersession posture while preserving deterministic ordering. + - continuity recall ordering metadata now surfaces explicit ranking evidence fields (`freshness_posture`, `provenance_posture`, `supersession_posture`) and their deterministic rank contributions. + - `/continuity` recall cards render ranking posture evidence (`freshness`, `provenance`, `supersession`) from API ordering metadata. +- Phase 6 Sprint 23 correction/freshness hygiene implementation is shipped baseline and adds deterministic weekly review evidence fields: + - `correction_recurrence_count` (open-loop objects with recurring correction events) + - `freshness_drift_count` (open-loop items currently in stale posture) +- Phase 6 Sprint 24 trust/evidence implementation is shipped baseline and adds deterministic trust dashboard + release evidence seams: + - `GET /v0/memories/trust-dashboard` returns one canonical quality posture payload combining quality-gate posture, queue posture/aging summary, retrieval-quality summary, correction recurrence/freshness drift summary, and explicit recommended next review action. + - `python3 scripts/run_phase6_quality_evidence.py` writes deterministic quality evidence artifact output for release/readiness paths. + - `python3 scripts/run_phase4_readiness_gates.py`, `python3 scripts/run_phase4_release_candidate.py`, and `python3 scripts/run_phase4_validation_matrix.py` now include additive quality evidence summary reporting while preserving existing GO/NO_GO semantics. +- `apps/web` is also a shipped surface now. The operator shell includes `/`, `/chat`, `/approvals`, `/tasks`, `/artifacts`, `/gmail`, `/calendar`, `/memories`, `/chief-of-staff`, `/entities`, and `/traces`, with live reads when API config is present and explicit fixture fallback when it is not. +- `/chief-of-staff` now ships P7-S27 preparation and resumption supervision on top of P7-S25/P7-S26: deterministic ranked priorities, deterministic follow-through supervision, deterministic preparation artifacts, trust-aware confidence posture (including explicit low-trust downgrade rendering), deterministic recommended next action, deterministic escalation posture, and draft-only follow-up artifact visibility. +- `/chat` now ships assistant-response mode, governed-request mode, visible thread selection, compact thread creation, selected-thread transcript continuity, deterministic resumption brief review, thread-linked governed workflow review, ordered task-step timeline review, bounded explain-why trace embedding, manual explicit-signal capture controls for selected `message.user` events, and bounded supporting continuity review over thread sessions and events. +- `/continuity` now ships the Phase 5 Sprint 17 + Sprint 18 + Sprint 19 + Sprint 20 continuity workspace: + - capture submit (optional explicit signal), recent capture list, and capture detail with posture/provenance + - recall query/results panel with scoped filters and provenance-backed result cards + - deterministic resumption-brief panel with always-present required sections + - review queue list/filter panel + - selected-object correction form with correction-event history and supersession chain visibility + - open-loop dashboard grouped by waiting-for/blocker/stale/next-action posture + - daily brief panel and weekly review panel with explicit empty-state rendering + - open-loop review-action controls (`done`, `deferred`, `still_blocked`) with immediate page refresh feedback +- `/gmail` ships a bounded Gmail operator workspace: account list review, selected-account detail, explicit account connection, and explicit single-message ingestion into one selected task workspace. +- `/calendar` ships a bounded Calendar operator workspace: account list review, selected-account detail, explicit account connection, and explicit single-event ingestion into one selected task workspace. The shipped API baseline now also includes bounded read-only event discovery for one selected account (`GET /v0/calendar-accounts/{calendar_account_id}/events`) with deterministic ordering metadata and bounded limits. +- `/memories` ships a bounded memory review workspace: active/queue list posture, selected memory detail, revision review, and memory-label review/submit seams with explicit live/fixture/unavailable states. +- `/entities` ships a bounded entity review workspace: list, selected entity detail, and related edge review with explicit live/fixture/unavailable states. +- `/artifacts` ships a bounded artifact review workspace: list, selected artifact detail, linked task-workspace summary, and ordered chunk evidence with explicit live/fixture/unavailable states. +- `workers` remains scaffold-only. + +## Current Boundaries + +- Continuity stays explicit and thread-scoped: thread create/list/detail plus session/event review and deterministic thread resumption-brief reads are live; thread rename, archive, search, pagination, and event mutation are not. +- Assistant replies go only through `POST /v0/responses`, persist immutable continuity events, and return linked compile and response traces. +- Explain-why in `/chat` is selected-thread scoped and bounded: it reuses shipped trace list/detail/event reads, shows linked trace shortcuts from transcript/workflow/timeline context, and keeps full trace workspace in `/traces`. +- Governed actions still route through policy, allowlist, approval, and approved-only proxy execution; `proxy.echo` is still the only live execution handler. +- Task workspaces and artifacts remain rooted local boundaries. Ingestion remains narrow to plain text, markdown, narrow PDF text, narrow DOCX text from `word/document.xml`, and narrow RFC822 extraction. +- Gmail and Calendar remain read-only connector surfaces. Calendar now includes bounded event discovery for one selected account plus selected-event ingestion. Secret material stays behind dedicated secret-manager seams, the Gmail `legacy_db_v0` transition path still exists for older credential rows, and the shipped web workspaces stay bounded to account review, explicit connect, and one-item ingestion into one selected task workspace. + +## Active Risks + +- Memory extraction and retrieval quality remain the main product risk. +- Auth is still incomplete beyond database user context. +- Connector breadth, richer parsing, and orchestration are still deferred; docs must stay synchronized with the shipped API-plus-web baseline, including `/gmail` and `/calendar`, so planning does not drift again. +- Phase 5 remaining scope: + - none from Sprint 17-20 continuity plan. +- Post-Phase-5 active scope: + - P6-S21, P6-S22, P6-S23, and P6-S24 are shipped baseline. + - P7-S25 priority engine and chief-of-staff dashboard is shipped baseline. + - P7-S26 follow-through supervision is shipped baseline. + - P7-S27 preparation briefs and resumption supervision is shipped baseline. + - P7-S28 weekly review and outcome learning is shipped baseline. + - P8-S29 chief-of-staff action handoff artifacts are shipped baseline. + - P8-S30 chief-of-staff handoff queue and operational review is shipped baseline. + - P8-S31 chief-of-staff governed execution routing is shipped baseline. + - active post-P7 scope is P8-S32 outcome learning and closure quality. + +## Repo Evidence To Trust + +- Backend continuity and response seams: `tests/integration/test_continuity_api.py`, `tests/integration/test_continuity_store.py`, `tests/integration/test_responses_api.py` +- Backend Gmail and Calendar seams: `tests/integration/test_gmail_accounts_api.py`, `tests/integration/test_calendar_accounts_api.py`, `tests/unit/test_gmail.py`, `tests/unit/test_calendar.py`, `tests/unit/test_calendar_main.py`, `tests/unit/test_20260316_0026_gmail_accounts.py`, `tests/unit/test_20260319_0030_calendar_accounts_and_credentials.py` +- Web `/chat` continuity + workflow/timeline/explainability adoption: `apps/web/app/chat/page.tsx`, `apps/web/app/chat/page.test.tsx`, `apps/web/components/thread-list.tsx`, `apps/web/components/thread-summary.tsx`, `apps/web/components/thread-event-list.tsx`, `apps/web/components/response-composer.tsx`, `apps/web/components/thread-workflow-panel.tsx`, `apps/web/components/task-step-list.tsx`, `apps/web/components/response-history.tsx`, `apps/web/components/thread-trace-panel.tsx`, and matching component tests. +- Web review workspaces added through the accepted Sprint 6P/6Q/6R sequence: `apps/web/app/memories/page.tsx`, `apps/web/app/memories/page.test.tsx`, `apps/web/app/entities/page.tsx`, `apps/web/app/entities/page.test.tsx`, `apps/web/app/artifacts/page.tsx`, `apps/web/app/artifacts/page.test.tsx`. +- Web Gmail and Calendar workspaces: `apps/web/app/gmail/page.tsx`, `apps/web/app/calendar/page.tsx`, `apps/web/lib/api.ts`, `apps/web/lib/api.test.ts`, `apps/web/components/gmail-account-list.test.tsx`, `apps/web/components/calendar-account-list.test.tsx`, `apps/web/components/calendar-event-ingest-form.test.tsx` +- Web continuity workspace + retrieval/resumption/review/open-loop briefing surfaces: `apps/web/app/continuity/page.tsx`, `apps/web/app/continuity/page.test.tsx`, `apps/web/components/continuity-recall-panel.tsx`, `apps/web/components/continuity-recall-panel.test.tsx`, `apps/web/components/resumption-brief.tsx`, `apps/web/components/resumption-brief.test.tsx`, `apps/web/components/continuity-review-queue.tsx`, `apps/web/components/continuity-review-queue.test.tsx`, `apps/web/components/continuity-correction-form.tsx`, `apps/web/components/continuity-correction-form.test.tsx`, `apps/web/components/continuity-open-loops-panel.tsx`, `apps/web/components/continuity-open-loops-panel.test.tsx`, `apps/web/components/continuity-daily-brief.tsx`, `apps/web/components/continuity-daily-brief.test.tsx`, `apps/web/components/continuity-weekly-review.tsx`, `apps/web/components/continuity-weekly-review.test.tsx` +- Shell route inventory and discoverability: `apps/web/components/app-shell.tsx`, `apps/web/app/page.tsx` + +## Planning Guardrails + +- Plan from the implemented Phase 3 Sprint 9 repo state, not from older Sprint 5-era narratives. +- Do not describe broader Gmail scope, broader Calendar scope beyond bounded read-only event discovery plus selected-event ingestion, richer parsing, broader proxy execution, auth expansion, or runner orchestration as shipped. +- The immediate next move should be chosen from the current shipped backend-plus-web-shell baseline, including `/gmail`, `/calendar`, `/memories`, `/entities`, and `/artifacts`, not assumed to be leftover connector cleanup by default. diff --git a/.ai/archive/planning/2026-04-07-phase9-bootstrap/SPRINT_PACKET.md b/.ai/archive/planning/2026-04-07-phase9-bootstrap/SPRINT_PACKET.md new file mode 100644 index 0000000..b384e6c --- /dev/null +++ b/.ai/archive/planning/2026-04-07-phase9-bootstrap/SPRINT_PACKET.md @@ -0,0 +1,232 @@ +# SPRINT_PACKET.md + +## Sprint Title + +Phase 8 Sprint 32 (P8-S32): Outcome Learning and Closure Quality + +## Sprint Type + +feature + +## Sprint Reason + +P8-S31 shipped deterministic governed execution routing on top of P8-S29/P8-S30. The next non-redundant seam is closing the Phase 8 loop by capturing handoff outcomes, exposing closure quality signals, and feeding deterministic learning signals back into chief-of-staff supervision. + +Planning anchors: + +- `docs/phase8-product-spec.md` +- `docs/phase8-sprint-29-32-plan.md` + +## Sprint Intent + +Ship deterministic outcome-learning seams on top of shipped P8-S29/P8-S30/P8-S31: + +- explicit handoff outcome capture/status semantics +- closure quality and conversion signal summaries +- stale/ignored escalation posture visibility +- `/chief-of-staff` outcome-learning and closure panel + +## Git Instructions + +- Branch Name: `codex/phase8-sprint-32-outcome-learning-closure-quality` +- Base Branch: `main` +- PR Strategy: one sprint branch, one PR +- Merge Policy: squash merge only after reviewer `PASS` and explicit Control Tower merge approval + +## Why This Sprint + +- It is the planned final Phase 8 seam after shipped routing capability. +- It turns routed handoffs into measurable closure outcomes. +- It completes recommendation-to-handoff-to-routing-to-learning feedback without widening autonomy. + +## Redundancy Guard + +- Already shipped baseline: + - Phase 4 release-control and MVP sign-off seams. + - Phase 5 continuity capture/recall/review/open-loop seams. + - Phase 6 trust calibration (`P6-S21` through `P6-S24`), complete as of March 31, 2026. + - Phase 7 chief-of-staff layer complete (`P7-S25` through `P7-S28`). + - Phase 8 Sprint 29 (`P8-S29`) action handoff artifacts and explicit non-autonomous posture. + - Phase 8 Sprint 30 (`P8-S30`) handoff queue lifecycle and operator review transitions. + - Phase 8 Sprint 31 (`P8-S31`) governed execution routing transitions and readiness posture. +- Required now (P8-S32): + - deterministic handoff outcome capture semantics + - closure quality summary and recommendation-to-execution conversion signals + - explicit stale/ignored escalation posture and feedback visibility +- Explicitly out of P8-S32: + - autonomous execution or external connector side effects + - redesign of P8-S29 handoff generation semantics + - redesign of P8-S30 queue/review lifecycle semantics + - redesign of P8-S31 routing semantics + - connector/channel/auth/orchestration expansion + +## Design Truth + +- Outcome learning must be deterministic for fixed state. +- Outcome capture must be explicit and auditable. +- Closure quality signals must be visible and explainable. +- Execution posture remains approval-bounded and non-autonomous. + +## Exact Surfaces In Scope + +- chief-of-staff handoff outcome-learning artifact/API seam +- closure quality and conversion summary seam +- stale/ignored escalation signal seam +- `/chief-of-staff` outcome-learning panel +- deterministic tests for outcome capture and learning rollups + +## Exact Files In Scope + +- `apps/api/src/alicebot_api/chief_of_staff.py` +- `apps/api/src/alicebot_api/contracts.py` +- `apps/api/src/alicebot_api/main.py` +- `apps/api/src/alicebot_api/tasks.py` +- `apps/api/src/alicebot_api/approvals.py` +- `apps/api/src/alicebot_api/memory.py` +- `apps/web/lib/api.ts` +- `apps/web/lib/api.test.ts` +- `apps/web/app/chief-of-staff/page.tsx` +- `apps/web/app/chief-of-staff/page.test.tsx` +- `apps/web/components/chief-of-staff-execution-routing-panel.tsx` +- `apps/web/components/chief-of-staff-execution-routing-panel.test.tsx` +- `apps/web/components/chief-of-staff-outcome-learning-panel.tsx` +- `apps/web/components/chief-of-staff-outcome-learning-panel.test.tsx` +- `tests/unit/test_chief_of_staff.py` +- `tests/integration/test_chief_of_staff_api.py` +- `README.md` +- `ROADMAP.md` +- `.ai/handoff/CURRENT_STATE.md` +- `BUILD_REPORT.md` +- `REVIEW_REPORT.md` +- `.ai/active/SPRINT_PACKET.md` + +## In Scope + +- Add deterministic outcome-learning fields on chief-of-staff payloads: + - `handoff_outcome_summary` + - `handoff_outcomes` + - `closure_quality_summary` + - `conversion_signal_summary` + - `stale_ignored_escalation_posture` +- Add explicit outcome-capture seam for routed handoff items: + - statuses: `reviewed`, `approved`, `rejected`, `rewritten`, `executed`, `ignored`, `expired` + - deterministic capture ordering and latest-state derivation + - immutable outcome capture records +- Add deterministic closure-learning behavior: + - recommendation-to-execution conversion signals + - stale/ignored escalation rollups + - explicit explanation payload for how outcome history affects guidance posture +- Add `/chief-of-staff` outcome-learning panel with outcome capture controls and closure metrics visibility. +- Add deterministic tests for outcome capture/status rollups and closure-learning summary behavior. + +## Out of Scope + +- automatic execution based on outcomes +- connector/channel expansion +- changes to shipped P8-S29 generation semantics +- changes to shipped P8-S30 queue/review semantics +- changes to shipped P8-S31 routing semantics + +## Required Deliverables + +- outcome-learning API/artifact seam +- deterministic outcome capture and closure rollup behavior +- stale/ignored escalation posture visibility +- `/chief-of-staff` outcome-learning UI panel +- unit/integration/web tests for outcome-learning behavior +- synced docs and sprint reports + +## Acceptance Criteria + +- routed handoff outcomes are captured with deterministic, explicit status semantics. +- closure quality and conversion signals are deterministic, auditable, and explainable. +- stale/ignored escalation posture is explicit and visible. +- approval-bounded non-autonomous posture remains preserved. +- `./.venv/bin/python -m pytest tests/unit/test_chief_of_staff.py tests/integration/test_chief_of_staff_api.py -q` passes. +- `pnpm --dir apps/web test -- app/chief-of-staff/page.test.tsx components/chief-of-staff-execution-routing-panel.test.tsx components/chief-of-staff-outcome-learning-panel.test.tsx lib/api.test.ts` passes. +- `python3 scripts/run_phase4_validation_matrix.py` remains PASS. +- `README.md`, `ROADMAP.md`, and `.ai/handoff/CURRENT_STATE.md` reflect active P8-S32 scope and preserve “P8-S29/P8-S30/P8-S31 shipped” truth. + +## Implementation Constraints + +- do not introduce new dependencies +- preserve shipped P5/P6/P7 semantics +- preserve shipped P8-S29 handoff-generation semantics +- preserve shipped P8-S30 queue/review semantics +- preserve shipped P8-S31 routing semantics +- keep side effects approval-bounded and explicit +- keep outcome-learning behavior deterministic and test-backed + +## Control Tower Task Cards + +### Task 1: Outcome Learning Engine + API + +Owner: tooling operative + +Write scope: + +- `apps/api/src/alicebot_api/chief_of_staff.py` +- `apps/api/src/alicebot_api/contracts.py` +- `apps/api/src/alicebot_api/main.py` +- `apps/api/src/alicebot_api/tasks.py` +- `apps/api/src/alicebot_api/approvals.py` +- `apps/api/src/alicebot_api/memory.py` +- `tests/unit/test_chief_of_staff.py` +- `tests/integration/test_chief_of_staff_api.py` + +### Task 2: Chief-of-Staff Outcome UI + +Owner: tooling operative + +Write scope: + +- `apps/web/lib/api.ts` +- `apps/web/lib/api.test.ts` +- `apps/web/app/chief-of-staff/page.tsx` +- `apps/web/app/chief-of-staff/page.test.tsx` +- `apps/web/components/chief-of-staff-execution-routing-panel.tsx` +- `apps/web/components/chief-of-staff-execution-routing-panel.test.tsx` +- `apps/web/components/chief-of-staff-outcome-learning-panel.tsx` +- `apps/web/components/chief-of-staff-outcome-learning-panel.test.tsx` + +### Task 3: Docs + Integration Review + +Owner: control tower + +Write scope: + +- `README.md` +- `ROADMAP.md` +- `.ai/handoff/CURRENT_STATE.md` +- `BUILD_REPORT.md` +- `REVIEW_REPORT.md` + +Responsibilities: + +- verify no P6/P7/P8-S29/P8-S30/P8-S31 relitigation +- verify deterministic outcome capture and closure-learning semantics +- verify no hidden scope expansion +- verify no Phase 4 regression + +## Build Report Requirements + +`BUILD_REPORT.md` must include: + +- exact outcome-learning contract delta +- exact deterministic outcome/closure summary behavior +- exact verification command outcomes +- explicit deferred Phase 8 follow-up scope after P8-S32 + +## Review Focus + +`REVIEW_REPORT.md` should verify: + +- sprint stayed P8-S32 scoped +- outcome-learning outputs are deterministic and explainable +- approval-bounded execution posture is explicit and preserved +- no hidden scope expansion +- Phase 4 validation remains green + +## Exit Condition + +This sprint is complete when Alice can deterministically capture handoff outcomes and present closure-quality learning signals that feed back into chief-of-staff supervision, without regressing shipped Phase 4/5/6/7 and P8-S29/P8-S30/P8-S31 behavior. diff --git a/.ai/handoff/CURRENT_STATE.md b/.ai/handoff/CURRENT_STATE.md index 5d7bbd7..3104fe1 100644 --- a/.ai/handoff/CURRENT_STATE.md +++ b/.ai/handoff/CURRENT_STATE.md @@ -20,8 +20,8 @@ ## Incomplete / At-Risk Areas -- no external adapter/importer interop has shipped yet beyond local MCP -- importer and adapter story is not yet public-ready +- importer coverage is still limited to one shipped adapter path (OpenClaw) +- broader importer story is not yet public-ready - OSS license finalization is still open ## Current Milestone @@ -30,7 +30,7 @@ Phase 9: Alice Public Core and Agent Interop ## Latest State Summary -`P9-S33`, `P9-S34`, and `P9-S35` are now shipped baselines: +`P9-S33`, `P9-S34`, `P9-S35`, and `P9-S36` are now shipped baselines: - package boundary is documented around `alice-core` - canonical local startup path is documented and script-backed @@ -54,12 +54,22 @@ Phase 9: Alice Public Core and Agent Interop - successful `alice_recall` and `alice_resume` calls - correction via `alice_memory_correct` changing subsequent retrieval deterministically - structured parity against shipped CLI/core behavior - -The next active seam is `P9-S36`: - -- implement OpenClaw adapter boundary on top of shipped CLI/MCP continuity contract +- OpenClaw adapter/import path exists for file-based workspace/export input: + - adapter modules: `openclaw_adapter.py`, `openclaw_models.py`, `openclaw_import.py` + - loader scripts: `./scripts/load_openclaw_sample_data.sh` and `load_openclaw_sample_data.py` + - deterministic fixture path: `fixtures/openclaw/workspace_v1.json` + - deterministic dedupe posture: workspace+payload fingerprint (repeat import returns noop duplicates) + - imported provenance is explicit via `source_kind=openclaw_import` and OpenClaw source metadata +- OpenClaw interop proof is covered by tests for: + - import -> recall/resumption behavior on imported scope + - shipped MCP `alice_recall`/`alice_resume` usage over imported data without MCP surface expansion +- ADR-004 defines the accepted OpenClaw integration boundary and scope constraints. + +The next active seam is `P9-S37`: + +- expand from single-adapter proof to broader importer coverage and evaluation harness work - keep parity strict with existing deterministic continuity semantics -- avoid widening MCP transport semantics while adapter boundary is established +- avoid widening MCP transport semantics unless parity defects are found ## Critical Constraints @@ -70,11 +80,11 @@ The next active seam is `P9-S36`: ## Immediate Next Move -Execute `P9-S36` on top of the `P9-S33`/`P9-S34`/`P9-S35` boundary: +Execute `P9-S37` on top of the shipped `P9-S36` boundary: -- build the first OpenClaw adapter using the shipped MCP wedge -- preserve deterministic continuity output semantics and correction parity -- keep startup/sample-data path unchanged while adapter support is added +- broaden importer coverage beyond OpenClaw using the same explicit provenance posture +- add benchmark/evaluation harness evidence for import quality and correction-aware continuity outcomes +- preserve startup/sample-data path and avoid MCP contract expansion unless needed for parity fixes ## Legacy Compatibility Markers diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 0cd1fe4..fa9edf0 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -13,7 +13,12 @@ The current implementation already includes: Phase 9 does not replace that architecture. It packages and exposes it through public-safe boundaries. -Current public-core packaging state is defined in `P9-S33`: one install path, one runtime baseline, and one deterministic sample-data fixture for recall and resumption verification. +Current public packaging baseline now spans `P9-S33` through `P9-S36`: + +- `P9-S33`: public-safe core boundary and canonical local startup +- `P9-S34`: deterministic local CLI continuity contract +- `P9-S35`: narrow deterministic MCP transport contract +- `P9-S36`: first OpenClaw adapter/import boundary with provenance + dedupe posture ## Technical Stack @@ -23,13 +28,14 @@ Current public-core packaging state is defined in `P9-S33`: one install path, on - Vector support: `pgvector` - Local infrastructure: Docker Compose, Redis, MinIO - Testing: pytest, Vitest -- Packaging target for `P9-S33`: +- Shipped packaging/runtime baseline (`P9-S33` to `P9-S36`): - `alice-core` (published package name in `pyproject.toml`) - deterministic fixture loader (`scripts/load_sample_data.sh`) -- Deferred packaging targets (`P9-S34+`): - - `apps/cli` - - `apps/mcp-server` - - importer/adapter packages + - deterministic local CLI contract (`python -m alicebot_api ...`) + - deterministic local MCP transport (`python -m alicebot_api.mcp_server`) + - OpenClaw import loader and fixture path (`scripts/load_openclaw_sample_data.sh`) +- Deferred/next packaging targets (`P9-S37+`): + - broader importer set and evaluation harness outputs ## High-Level Architecture @@ -50,13 +56,13 @@ Current public-core packaging state is defined in `P9-S33`: one install path, on - resumption - open-loop retrieval - correction-aware memory behavior -2. `alice-cli` (deferred) +2. `alice-cli` (shipped baseline in `P9-S34`, module-scoped runtime) - terminal access to public core flows -3. `alice-mcp-server` (deferred) +3. `alice-mcp-server` (shipped baseline in `P9-S35`, narrow tool contract) - stable MCP tool surface for external assistants -4. `alice-importers` (deferred) +4. `alice-importers` (next seam in `P9-S37`) - markdown, chat export, CSV, and adapter-backed imports -5. `alice-openclaw` (deferred) +5. `alice-openclaw` (shipped baseline in `P9-S36`, adapter-scoped import path) - OpenClaw-specific ingestion and interop mapping ## Module Boundaries diff --git a/BUILD_REPORT.md b/BUILD_REPORT.md index 3798307..22b45e2 100644 --- a/BUILD_REPORT.md +++ b/BUILD_REPORT.md @@ -1,106 +1,109 @@ # BUILD_REPORT.md ## sprint objective -Ship `P9-S35` by adding a deterministic local MCP server that exposes the ADR-003 continuity tool surface (`alice_capture`, `alice_recall`, `alice_resume`, `alice_open_loops`, `alice_recent_decisions`, `alice_recent_changes`, `alice_memory_review`, `alice_memory_correct`, `alice_context_pack`) over the shipped `alice-core` runtime without changing `P9-S33` startup flow or `P9-S34` semantics. +Ship `P9-S36` by adding the first OpenClaw adapter/import path so OpenClaw workspace or durable-memory data can be imported into Alice continuity objects with explicit provenance and deterministic dedupe, then queried through shipped recall/resumption semantics and optionally through the shipped MCP tool surface without widening MCP contracts. ## completed work -- Added runnable MCP transport entrypoint: - - `apps/api/src/alicebot_api/mcp_server.py` - - stdio JSON-RPC loop with deterministic framing - - supported methods: `initialize`, `ping`, `tools/list`, `tools/call` -- Added deterministic MCP tool layer: - - `apps/api/src/alicebot_api/mcp_tools.py` - - static deterministic tool schemas (ADR-003 tool names in fixed order) - - direct mapping to shipped continuity seams (capture/recall/resume/open-loops/review/correction) - - deterministic structured serialization and narrow error envelopes -- Added package script entrypoint: - - `pyproject.toml`: `alicebot-mcp = "alicebot_api.mcp_server:main"` -- Added MCP unit and integration verification: - - `tests/unit/test_mcp.py` - - `tests/integration/test_mcp_server.py` - - `tests/integration/test_mcp_cli_parity.py` -- Added interoperability evidence for a real MCP client path (stdio JSON-RPC client subprocess): - - successful `alice_recall` - - successful `alice_resume` - - successful `alice_memory_correct` (`supersede`) with deterministic change in later recall/resume result -- Captured required acceptance evidence details: - - exact MCP startup path used: `./.venv/bin/python -m alicebot_api.mcp_server` - - exact local client/config used for proof: - - client type: stdio JSON-RPC MCP client subprocess - - transport command: `python -m alicebot_api.mcp_server` - - env: `DATABASE_URL=postgresql://alicebot_app:alicebot_app@localhost:5432/alicebot` - - env: `ALICEBOT_AUTH_USER_ID=00000000-0000-0000-0000-000000000001` - - intentionally deferred concern: no hosted/remote auth layer (local process + local user scope only) -- Updated sprint-scoped docs: - - `README.md` with exact MCP startup path and compatible local client config example - - `ROADMAP.md` marking `P9-S35` shipped baseline - - `.ai/handoff/CURRENT_STATE.md` with MCP shipped baseline and `P9-S36` next seam +- Implemented OpenClaw adapter boundary and input normalization: + - `apps/api/src/alicebot_api/openclaw_models.py` + - `apps/api/src/alicebot_api/openclaw_adapter.py` + - Supported source contract: + - JSON file with `durable_memory` / `memories` / `items` / `records` + - workspace directory with known JSON files (`workspace.json`, `openclaw_workspace.json`, `durable_memory.json`, `memories.json`, `openclaw_memories.json`) +- Implemented OpenClaw import-to-continuity mapping: + - `apps/api/src/alicebot_api/openclaw_import.py` + - deterministic mapping into shipped continuity object types (`Decision`, `NextAction`, `WaitingFor`, `Commitment`, etc.) + - explicit provenance tagging on imported material (`source_kind=openclaw_import`, workspace/source metadata) + - deterministic dedupe posture via stable workspace+payload fingerprint (`openclaw_dedupe_key`) +- Hardened importer lifecycle-status handling: + - unknown external `status` values are rejected with explicit validation errors + - importer no longer silently coerces unknown statuses to `active` +- Added reproducible fixture and local import path: + - `fixtures/openclaw/workspace_v1.json` + - `scripts/load_openclaw_sample_data.py` + - `scripts/load_openclaw_sample_data.sh` +- Added verification coverage for adapter/import/interop: + - `tests/unit/test_openclaw_adapter.py` + - `tests/integration/test_openclaw_import.py` + - `tests/integration/test_openclaw_mcp_integration.py` +- Added adapter boundary ADR: + - `docs/adr/ADR-004-openclaw-integration-boundary.md` +- Synced sprint-scoped docs: + - `README.md` + - `ROADMAP.md` + - `.ai/handoff/CURRENT_STATE.md` + - `ARCHITECTURE.md` + - `RULES.md` + - `.ai/active/SPRINT_PACKET.md` (scope hygiene annotation for archived planning artifacts) ## incomplete work -- None inside `P9-S35` scope. +- None inside `P9-S36` scope. - Intentionally deferred (out of scope): - - OpenClaw adapter implementation (`P9-S36`) - - importer expansion - - hosted/remote auth systems - - MCP ergonomics beyond the initial narrow wedge (pagination/advanced discovery ergonomics) + - generic multi-source importer framework + - MCP tool-surface expansion + - hosted adapter/auth/service work ## files changed -- `.ai/active/SPRINT_PACKET.md` -- `apps/api/src/alicebot_api/mcp_server.py` -- `apps/api/src/alicebot_api/mcp_tools.py` -- `pyproject.toml` -- `tests/unit/test_mcp.py` -- `tests/integration/test_mcp_server.py` -- `tests/integration/test_mcp_cli_parity.py` +- `apps/api/src/alicebot_api/openclaw_models.py` +- `apps/api/src/alicebot_api/openclaw_adapter.py` +- `apps/api/src/alicebot_api/openclaw_import.py` +- `scripts/load_openclaw_sample_data.py` +- `scripts/load_openclaw_sample_data.sh` +- `fixtures/openclaw/workspace_v1.json` +- `tests/unit/test_openclaw_adapter.py` +- `tests/integration/test_openclaw_import.py` +- `tests/integration/test_openclaw_mcp_integration.py` +- `docs/adr/ADR-004-openclaw-integration-boundary.md` +- `.ai/archive/planning/2026-04-07-phase9-bootstrap/SPRINT_PACKET.md` +- `.ai/archive/planning/2026-04-07-phase9-bootstrap/CURRENT_STATE.md` +- `docs/archive/planning/2026-04-07-phase9-bootstrap/README.md` +- `docs/archive/planning/2026-04-07-phase9-bootstrap/ROADMAP.md` +- `docs/archive/planning/2026-04-07-phase9-bootstrap/PRODUCT_BRIEF.md` +- `docs/archive/planning/2026-04-07-phase9-bootstrap/ARCHITECTURE.md` +- `docs/archive/planning/2026-04-07-phase9-bootstrap/RULES.md` +- `ARCHITECTURE.md` +- `RULES.md` - `README.md` - `ROADMAP.md` +- `docs/phase9-sprint-33-38-plan.md` - `.ai/handoff/CURRENT_STATE.md` +- `.ai/active/SPRINT_PACKET.md` - `BUILD_REPORT.md` - `REVIEW_REPORT.md` -## local artifacts explicitly excluded from sprint merge scope -- `.ai/archive/` (local archive workspace artifacts) -- `docs/archive/planning/` (local planning archive artifacts) - ## tests run - `docker compose up -d` - PASS - `./scripts/migrate.sh` - - PASS (required elevated local DB access) + - PASS - `./scripts/load_sample_data.sh` - - PASS (`status=noop`, fixture already loaded) + - PASS (`status=noop`, already loaded) +- `./scripts/load_openclaw_sample_data.sh --source fixtures/openclaw/workspace_v1.json` + - PASS (`status=ok`, `imported_count=4`, `skipped_duplicates=1`) +- `./scripts/load_openclaw_sample_data.sh --source fixtures/openclaw/workspace_v1.json` + - PASS (`status=noop`, `imported_count=0`, `skipped_duplicates=5`) - `APP_RELOAD=false ./scripts/api_dev.sh` - - PASS (server started on `http://127.0.0.1:8000`) + - PASS (started on `http://127.0.0.1:8000`) - `curl -sS http://127.0.0.1:8000/healthz` - PASS (`status":"ok"`) -- `./.venv/bin/python -m alicebot_api --help` - - PASS -- `./.venv/bin/python -m alicebot_api.mcp_server --help` - - PASS -- `./.venv/bin/python -m pytest tests/unit/test_mcp.py -q` +- `./.venv/bin/python -m alicebot_api recall --thread-id cccccccc-cccc-4ccc-8ccc-cccccccccccc --project "Alice Public Core" --query "MCP tool surface" --limit 5` + - PASS (returned imported OpenClaw `Decision` with `source_kind=openclaw_import` provenance references) +- `./.venv/bin/python -m alicebot_api resume --thread-id cccccccc-cccc-4ccc-8ccc-cccccccccccc --max-recent-changes 5 --max-open-loops 5` + - PASS (`last_decision`, `next_action`, and `recent_changes` include imported OpenClaw data) +- `./.venv/bin/python -m pytest tests/unit/test_openclaw_adapter.py -q` - PASS (`5 passed`) -- `./.venv/bin/python -m pytest tests/integration/test_mcp_server.py -q` - - PASS (`1 passed`) -- `./.venv/bin/python -m pytest tests/integration/test_mcp_cli_parity.py -q` - - PASS (`1 passed`) -- `./.venv/bin/python -m pytest tests/unit/test_mcp.py tests/integration/test_mcp_server.py tests/integration/test_mcp_cli_parity.py -q` +- `./.venv/bin/python -m pytest tests/integration/test_openclaw_import.py tests/integration/test_openclaw_mcp_integration.py -q` + - PASS (`2 passed`) +- `./.venv/bin/python -m pytest tests/unit/test_openclaw_adapter.py tests/integration/test_openclaw_import.py tests/integration/test_openclaw_mcp_integration.py -q` - PASS (`7 passed`) - `./.venv/bin/python -m pytest tests/unit tests/integration` - - PASS (`961 passed in 97.32s`) (required elevated local DB access) + - PASS (`968 passed in 90.94s`) - `pnpm --dir apps/web test` - PASS (`57 files, 192 tests`) -- MCP smoke client against new entrypoint (`python -m alicebot_api.mcp_server`) - - PASS - - initialize protocol: `2024-11-05` - - `alice_recall`: `isError=false`, returned count `3` - - `alice_resume`: `isError=false`, last decision present ## blockers/issues -- Sandbox restrictions required elevated execution for localhost Postgres connections and binding to localhost `:8000`. -- Initial MCP integration test failure due missing `if __name__ == "__main__":` in `mcp_server.py`; fixed by adding module entry invocation. -- Remaining non-sprint workspace artifacts are limited to untracked local archive directories: - - `.ai/archive/` - - `docs/archive/planning/` +- Sandbox restrictions required elevated execution for localhost Postgres/API verification commands. +- No remaining functional blockers in sprint scope. ## recommended next step -Start `P9-S36` by implementing the OpenClaw adapter against the now-stable MCP tool contract, keeping strict parity checks so adapter integration does not reopen continuity transport semantics. +Start `P9-S37` by generalizing importer coverage from the now-shipped OpenClaw boundary while preserving the same explicit provenance and dedupe posture, and adding benchmark/evaluation harness evidence for importer quality. diff --git a/README.md b/README.md index 84a86b7..1e03c4e 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Alice is a local-first memory and continuity engine for AI agents. -`P9-S33` shipped the public-core baseline. `P9-S34` shipped the deterministic local CLI for continuity flows on top of that baseline. `P9-S35` now ships a narrow MCP transport for the same continuity contract. +`P9-S33` shipped the public-core baseline. `P9-S34` shipped the deterministic local CLI for continuity flows on top of that baseline. `P9-S35` shipped a narrow MCP transport for the same continuity contract. `P9-S36` ships the first OpenClaw adapter/import path on top of those shipped surfaces. ## Canonical Local Startup Path (`P9-S33`) @@ -88,6 +88,30 @@ Initial ADR-003 MCP tools: - `alice_memory_correct` - `alice_context_pack` +## OpenClaw Adapter Path (`P9-S36`) + +`P9-S36` delivers the first OpenClaw adapter path: + +- import one sample or real OpenClaw workspace / durable-memory export +- preserve import provenance and dedupe posture +- prove Alice recall and resumption over imported OpenClaw material +- optionally prove shipped MCP tools working over that imported data + +Run the local OpenClaw sample import: + +```bash +./scripts/load_openclaw_sample_data.sh --source fixtures/openclaw/workspace_v1.json +``` + +Sample proof commands against imported scope: + +```bash +./.venv/bin/python -m alicebot_api recall --thread-id cccccccc-cccc-4ccc-8ccc-cccccccccccc --project "Alice Public Core" --query "MCP tool surface" --limit 5 +./.venv/bin/python -m alicebot_api resume --thread-id cccccccc-cccc-4ccc-8ccc-cccccccccccc --max-recent-changes 5 --max-open-loops 5 +``` + +Dedupe posture is deterministic: re-running the same import returns `status=noop` with `skipped_duplicates=5`. + ### Compatible Client Example (Claude Desktop MCP) `claude_desktop_config.json` example: @@ -119,6 +143,7 @@ Initial ADR-003 MCP tools: - `apps/api`: FastAPI runtime and continuity core seams - `apps/web`: operator shell - `fixtures/public_sample_data`: deterministic public-core sample dataset +- `fixtures/openclaw`: deterministic OpenClaw adapter fixture dataset - `scripts`: startup, migration, and sample-data load scripts - `docs`: product, architecture, ADRs, and Phase 9 planning docs @@ -134,6 +159,7 @@ Initial ADR-003 MCP tools: - [docs/phase9-sprint-33-38-plan.md](docs/phase9-sprint-33-38-plan.md) - [docs/phase9-public-core-boundary.md](docs/phase9-public-core-boundary.md) - [docs/phase9-bootstrap-notes.md](docs/phase9-bootstrap-notes.md) +- [docs/adr/ADR-004-openclaw-integration-boundary.md](docs/adr/ADR-004-openclaw-integration-boundary.md) ## Legacy Compatibility Marker diff --git a/REVIEW_REPORT.md b/REVIEW_REPORT.md index b8d9138..3910c71 100644 --- a/REVIEW_REPORT.md +++ b/REVIEW_REPORT.md @@ -4,51 +4,41 @@ PASS ## criteria met -- Runnable MCP server entrypoint is implemented and callable from the documented local path: `python -m alicebot_api.mcp_server`, with console-script packaging via `alicebot-mcp`. -- ADR-003 initial MCP tool surface is implemented with deterministic schemas and fixed ordering: - - `alice_capture` - - `alice_recall` - - `alice_resume` - - `alice_open_loops` - - `alice_recent_decisions` - - `alice_recent_changes` - - `alice_memory_review` - - `alice_memory_correct` - - `alice_context_pack` -- One MCP-capable client path can call `alice_recall` successfully against the local runtime, verified in `tests/integration/test_mcp_server.py`. -- One MCP-capable client path can call `alice_resume` successfully against the local runtime, verified in `tests/integration/test_mcp_server.py`. -- Correction through `alice_memory_correct` changes later retrieval/resumption behavior deterministically, verified in `tests/integration/test_mcp_server.py`. -- MCP outputs remain narrow, deterministic, and provenance-backed through direct transport wrappers over shipped continuity seams in `apps/api/src/alicebot_api/mcp_tools.py`. -- Parity evidence exists between MCP and shipped CLI/core behavior in `tests/integration/test_mcp_cli_parity.py`. -- Sprint docs are aligned with the delivered MCP surface in `README.md`, `ROADMAP.md`, `.ai/handoff/CURRENT_STATE.md`, `BUILD_REPORT.md`, and this report. +- OpenClaw adapter/import boundary is implemented and runnable with fixture + loader scripts. +- Imported material is queryable through shipped recall semantics and contributes to shipped resumption output. +- Imported provenance remains explicit (`source_kind=openclaw_import`, `openclaw_*` metadata fields). +- Dedupe posture remains deterministic and idempotent (initial import + noop re-import behavior preserved). +- MCP augmentation proof remains within shipped tool contract (`alice_recall`, `alice_resume`). +- Status-handling fix landed: unknown external `status` values are now explicitly rejected instead of silently coerced to `active`. +- Scope/docs hygiene fixes landed: + - sprint packet scope now explicitly allows the archive snapshots that were added for traceability + - build report files-changed list now includes archive paths + - architecture and rules docs were updated to align with shipped `P9-S34/35/36` status and importer status-mapping discipline +- Verification rerun after fixes: + - `./.venv/bin/python -m pytest tests/unit/test_openclaw_adapter.py -q` -> `5 passed` + - `./.venv/bin/python -m pytest tests/integration/test_openclaw_import.py tests/integration/test_openclaw_mcp_integration.py -q` -> `2 passed` + - `./.venv/bin/python -m pytest tests/unit/test_openclaw_adapter.py tests/integration/test_openclaw_import.py tests/integration/test_openclaw_mcp_integration.py -q` -> `7 passed` + - `./.venv/bin/python -m pytest tests/unit tests/integration` -> `968 passed` + - `pnpm --dir apps/web test` -> `57 files, 192 tests` ## criteria missed - None. ## quality issues -- No blocking quality issues found in sprint scope. -- The sprint diff includes the MCP source and test files required by the packet, so the delivered feature matches the actual branch payload. +- No blocking quality issues found in sprint scope after fixes. ## regression risks - Low. -- Directly verified during review: - - `./.venv/bin/python -m alicebot_api.mcp_server --help` - - `./.venv/bin/python -m pytest tests/unit/test_mcp.py tests/integration/test_mcp_server.py tests/integration/test_mcp_cli_parity.py -q` - - `./.venv/bin/python -m pytest tests/unit tests/integration` - - `pnpm --dir apps/web test` -- Residual risk is future contract drift between MCP and CLI/core behavior if later seams widen payloads without preserving parity tests. +- Residual risk is primarily future importer expansion drift; current adapter path is protected by targeted and full-suite passing tests. ## docs issues -- No blocking docs issues in sprint scope. -- `README.md` includes the exact local MCP startup path, auth/config assumptions, and one compatible client configuration example. -- The only remaining non-sprint worktree paths are local archive directories explicitly excluded from merge scope (`.ai/archive/`, `docs/archive/planning/`). +- No blocking docs issues remain for this sprint. ## should anything be added to RULES.md? -- Optional hardening: require new runnable module entrypoints to include explicit `if __name__ == "__main__":` invocation whenever `python -m ...` is part of the documented startup path. +- Already addressed in this pass: importer rule added requiring unknown external lifecycle/status values to be explicitly mapped or rejected. ## should anything update ARCHITECTURE.md? -- No required update. The implementation stays within the existing Phase 9 architecture and ADR-003 by treating MCP as a transport wrapper over shipped continuity seams. +- Already addressed in this pass: Phase 9 packaging-state language now reflects shipped `P9-S34` CLI, `P9-S35` MCP transport, and `P9-S36` OpenClaw adapter baseline. ## recommended next action -1. Finalize the sprint PR from the current staged diff. -2. Keep `tests/integration/test_mcp_cli_parity.py` as a required guard while building `P9-S36` so adapter work does not reopen MCP transport semantics. +1. Proceed to `P9-S37` importer expansion, preserving the same provenance/dedupe discipline and explicit status-mapping posture. diff --git a/ROADMAP.md b/ROADMAP.md index 83f3ff4..519d3f4 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -44,12 +44,12 @@ Success condition: - local interop examples for compatible clients - deterministic tool contracts -### P9-S36: OpenClaw Adapter (next active seam) +### P9-S36: OpenClaw Adapter (shipped baseline) - import path for OpenClaw durable memory/workspace data - Alice MCP augmentation mode for OpenClaw-style workflows -### P9-S37: Importers and Evaluation Harness +### P9-S37: Importers and Evaluation Harness (current seam) - at least three production-usable importers - local benchmark and baseline report generation @@ -81,6 +81,7 @@ Success condition: - Phase 8 delivered operational chief-of-staff handoffs, routing, outcome learning, and closure quality. - `P9-S33` delivered the public-safe `alice-core` boundary, canonical local startup path, and deterministic sample-data proof. - `P9-S34` delivered the shipped local CLI continuity contract that `P9-S35` should mirror through MCP. +- `P9-S35` delivered the shipped local MCP contract that `P9-S36` should consume without widening. ## Legacy Compatibility Markers diff --git a/RULES.md b/RULES.md index 51beb11..2b0f29a 100644 --- a/RULES.md +++ b/RULES.md @@ -26,6 +26,7 @@ - Preserve append-only continuity, correction, and revision history. - Keep imported data provenance explicit. +- Importers must explicitly map or reject unknown external lifecycle/status values; do not silently coerce them to `active`. - Default memory admission to conservative behavior; do not loosen admission discipline for launch convenience. - Do not silently overwrite stale or superseded truth. diff --git a/apps/api/src/alicebot_api/openclaw_adapter.py b/apps/api/src/alicebot_api/openclaw_adapter.py new file mode 100644 index 0000000..bad1a9c --- /dev/null +++ b/apps/api/src/alicebot_api/openclaw_adapter.py @@ -0,0 +1,418 @@ +from __future__ import annotations + +from hashlib import sha256 +import json +from pathlib import Path + +from alicebot_api.openclaw_models import ( + OpenClawAdapterValidationError, + OpenClawNormalizedBatch, + OpenClawNormalizedItem, + OpenClawWorkspaceContext, + as_json_object, + canonical_json_string, + ensure_json_object, + merge_json_objects, + normalize_optional_text, + parse_optional_confidence, + parse_optional_status, + pick_first_text, + to_string_list, +) +from alicebot_api.store import JsonObject + + +_OPENCLAW_TYPE_TO_OBJECT_TYPE: dict[str, str] = { + "decision": "Decision", + "decisions": "Decision", + "task": "NextAction", + "next": "NextAction", + "next_action": "NextAction", + "nextaction": "NextAction", + "action": "NextAction", + "commitment": "Commitment", + "waiting": "WaitingFor", + "waiting_for": "WaitingFor", + "waitingfor": "WaitingFor", + "blocker": "Blocker", + "fact": "MemoryFact", + "memory_fact": "MemoryFact", + "memory": "MemoryFact", + "note": "Note", +} + +_OBJECT_TYPE_TO_BODY_KEY: dict[str, str] = { + "Note": "body", + "MemoryFact": "fact_text", + "Decision": "decision_text", + "Commitment": "commitment_text", + "WaitingFor": "waiting_for_text", + "Blocker": "blocking_reason", + "NextAction": "action_text", +} + +_OBJECT_TYPE_TO_PREFIX: dict[str, str] = { + "Decision": "Decision", + "Commitment": "Commitment", + "WaitingFor": "Waiting For", + "Blocker": "Blocker", + "NextAction": "Next Action", + "MemoryFact": "Memory Fact", + "Note": "Note", +} + +_DEFAULT_CONFIDENCE = 0.82 +_SUPPORTED_WORKSPACE_FILENAMES = ( + "workspace.json", + "openclaw_workspace.json", +) +_SUPPORTED_MEMORY_FILENAMES = ( + "durable_memory.json", + "memories.json", + "openclaw_memories.json", +) + + +def _truncate(value: str, *, max_length: int) -> str: + if len(value) <= max_length: + return value + return value[: max_length - 3].rstrip() + "..." + + +def _normalize_object_type(value: object) -> str: + normalized = normalize_optional_text(value) + if normalized is None: + return "Note" + + if normalized in _OBJECT_TYPE_TO_BODY_KEY: + return normalized + + lowered = normalized.casefold().replace("-", "_").replace(" ", "_") + return _OPENCLAW_TYPE_TO_OBJECT_TYPE.get(lowered, "Note") + + +def _build_body(*, object_type: str, text: str, raw_entry: JsonObject) -> JsonObject: + body_key = _OBJECT_TYPE_TO_BODY_KEY[object_type] + return { + body_key: text, + "raw_import_text": text, + "openclaw_raw_entry": raw_entry, + } + + +def _build_title(*, object_type: str, text: str, explicit_title: str | None) -> str: + if explicit_title is not None: + return _truncate(explicit_title, max_length=280) + prefix = _OBJECT_TYPE_TO_PREFIX[object_type] + return _truncate(f"{prefix}: {text}", max_length=280) + + +def _build_raw_content(*, object_type: str, text: str) -> str: + prefix = _OBJECT_TYPE_TO_PREFIX[object_type] + return f"{prefix}: {text}" + + +def _read_json(path: Path) -> object: + try: + return json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + raise OpenClawAdapterValidationError( + f"invalid JSON at {path}: {exc.msg}" + ) from exc + + +def _extract_workspace_payloads(payload: object) -> tuple[JsonObject | None, list[JsonObject]]: + if isinstance(payload, list): + entries = [ensure_json_object(item, field_name="entry") for item in payload] + return None, entries + + if not isinstance(payload, dict): + raise OpenClawAdapterValidationError("OpenClaw source root must be a JSON object or array") + + workspace_payload = payload.get("workspace") + if workspace_payload is not None and not isinstance(workspace_payload, dict): + raise OpenClawAdapterValidationError("workspace must be a JSON object when provided") + workspace_json = as_json_object(workspace_payload) if workspace_payload is not None else None + + for key in ("durable_memory", "memories", "items", "records"): + raw_entries = payload.get(key) + if raw_entries is None: + continue + if not isinstance(raw_entries, list): + raise OpenClawAdapterValidationError(f"{key} must be a JSON array") + entries = [ensure_json_object(item, field_name=f"{key}[]") for item in raw_entries] + return workspace_json, entries + + if workspace_payload is not None: + return workspace_json, [] + + # Single-record convenience format. + if payload.get("content") is not None or payload.get("text") is not None: + return None, [ensure_json_object(payload, field_name="payload")] + + raise OpenClawAdapterValidationError( + "OpenClaw payload must include one of: durable_memory, memories, items, or records" + ) + + +def _extract_scope_provenance(entry: JsonObject, *, raw_provenance: JsonObject) -> JsonObject: + entry_context = as_json_object(entry.get("context")) + + thread_id = pick_first_text( + entry.get("thread_id"), + raw_provenance.get("thread_id"), + entry_context.get("thread_id"), + ) + task_id = pick_first_text( + entry.get("task_id"), + raw_provenance.get("task_id"), + entry_context.get("task_id"), + ) + project = pick_first_text( + entry.get("project"), + entry.get("project_name"), + raw_provenance.get("project"), + entry_context.get("project"), + entry_context.get("project_name"), + ) + person = pick_first_text( + entry.get("person"), + entry.get("owner"), + raw_provenance.get("person"), + raw_provenance.get("owner"), + entry_context.get("person"), + entry_context.get("owner"), + ) + confirmation_status = pick_first_text( + entry.get("confirmation_status"), + raw_provenance.get("confirmation_status"), + entry_context.get("confirmation_status"), + ) + + source_event_ids = to_string_list(entry.get("source_event_ids")) + if not source_event_ids: + source_event_ids = to_string_list(raw_provenance.get("source_event_ids")) + if not source_event_ids: + source_event_ids = to_string_list(entry_context.get("source_event_ids")) + + payload: JsonObject = {} + if thread_id is not None: + payload["thread_id"] = thread_id + if task_id is not None: + payload["task_id"] = task_id + if project is not None: + payload["project"] = project + if person is not None: + payload["person"] = person + if confirmation_status is not None: + payload["confirmation_status"] = confirmation_status.casefold() + if source_event_ids: + payload["source_event_ids"] = source_event_ids + + tags = to_string_list(entry.get("tags")) + if tags: + payload["openclaw_tags"] = tags + + return payload + + +def _item_text(entry: JsonObject) -> str: + text = pick_first_text( + entry.get("text"), + entry.get("content"), + entry.get("summary"), + entry.get("message"), + ) + if text is None: + raise OpenClawAdapterValidationError("OpenClaw entry must include text/content/summary/message") + return text + + +def _normalize_entry( + *, + entry: JsonObject, + source_file: str, + entry_index: int, + workspace_id: str, +) -> OpenClawNormalizedItem: + source_identifier = pick_first_text( + entry.get("id"), + entry.get("memory_id"), + entry.get("entry_id"), + ) + source_item_id = source_identifier if source_identifier is not None else f"{source_file}:{entry_index + 1}" + + object_type = _normalize_object_type( + pick_first_text( + entry.get("object_type"), + entry.get("type"), + entry.get("kind"), + entry.get("category"), + ) + ) + status = parse_optional_status(entry.get("status")) or "active" + + text = _item_text(entry) + title = _build_title( + object_type=object_type, + text=text, + explicit_title=pick_first_text(entry.get("title")), + ) + raw_entry = as_json_object(entry) + + raw_provenance = as_json_object(entry.get("provenance")) + source_provenance = merge_json_objects( + _extract_scope_provenance(entry, raw_provenance=raw_provenance), + { + "openclaw_record_type": pick_first_text( + entry.get("type"), + entry.get("kind"), + entry.get("category"), + ) + or "unknown", + }, + ) + if source_identifier is not None: + source_provenance["openclaw_source_identifier"] = source_identifier + + confidence = parse_optional_confidence(entry.get("confidence")) + if confidence is None: + confidence = parse_optional_confidence(raw_provenance.get("confidence")) + if confidence is None: + confidence = _DEFAULT_CONFIDENCE + + dedupe_payload: JsonObject = { + "workspace_id": workspace_id, + "source_identifier": source_identifier, + "object_type": object_type, + "status": status, + "title": title, + "body": _build_body(object_type=object_type, text=text, raw_entry=raw_entry), + "source_provenance": source_provenance, + } + dedupe_key = sha256(canonical_json_string(dedupe_payload).encode("utf-8")).hexdigest() + + return OpenClawNormalizedItem( + source_item_id=source_item_id, + source_file=source_file, + object_type=object_type, + status=status, + raw_content=_build_raw_content(object_type=object_type, text=text), + title=title, + body=_build_body(object_type=object_type, text=text, raw_entry=raw_entry), + confidence=confidence, + source_provenance=source_provenance, + dedupe_key=dedupe_key, + ) + + +def _extract_context( + *, + source_path: Path, + workspace_payload: JsonObject | None, + fallback_fixture_id: str | None, +) -> OpenClawWorkspaceContext: + payload = workspace_payload or {} + workspace_id = pick_first_text( + payload.get("id"), + payload.get("workspace_id"), + fallback_fixture_id, + source_path.stem, + ) + if workspace_id is None: + raise OpenClawAdapterValidationError("workspace id could not be resolved") + + return OpenClawWorkspaceContext( + fixture_id=fallback_fixture_id, + workspace_id=workspace_id, + workspace_name=pick_first_text(payload.get("name"), payload.get("title")), + source_path=str(source_path), + ) + + +def load_openclaw_payload(source: str | Path) -> OpenClawNormalizedBatch: + source_path = Path(source).expanduser().resolve() + if not source_path.exists(): + raise OpenClawAdapterValidationError(f"OpenClaw source path does not exist: {source_path}") + + entries_by_file: list[tuple[str, list[JsonObject]]] = [] + workspace_payload: JsonObject | None = None + fixture_id: str | None = None + + if source_path.is_file(): + payload = _read_json(source_path) + parsed_workspace, entries = _extract_workspace_payloads(payload) + if isinstance(payload, dict): + fixture_id = normalize_optional_text(payload.get("fixture_id")) + workspace_payload = parsed_workspace + entries_by_file.append((source_path.name, entries)) + else: + for filename in _SUPPORTED_WORKSPACE_FILENAMES: + candidate = source_path / filename + if not candidate.exists(): + continue + payload = _read_json(candidate) + parsed_workspace, _ = _extract_workspace_payloads(payload) + workspace_payload = parsed_workspace or workspace_payload + if isinstance(payload, dict): + fixture_id = fixture_id or normalize_optional_text(payload.get("fixture_id")) + break + + for filename in _SUPPORTED_MEMORY_FILENAMES: + candidate = source_path / filename + if not candidate.exists(): + continue + payload = _read_json(candidate) + parsed_workspace, entries = _extract_workspace_payloads(payload) + if parsed_workspace is not None: + workspace_payload = parsed_workspace + if isinstance(payload, dict): + fixture_id = fixture_id or normalize_optional_text(payload.get("fixture_id")) + entries_by_file.append((filename, entries)) + + if not entries_by_file: + json_files = sorted(path for path in source_path.iterdir() if path.suffix == ".json") + for path in json_files: + payload = _read_json(path) + parsed_workspace, entries = _extract_workspace_payloads(payload) + if parsed_workspace is not None: + workspace_payload = parsed_workspace + if isinstance(payload, dict): + fixture_id = fixture_id or normalize_optional_text(payload.get("fixture_id")) + if entries: + entries_by_file.append((path.name, entries)) + + if not entries_by_file: + raise OpenClawAdapterValidationError("no OpenClaw memory entries were found at the source path") + + context = _extract_context( + source_path=source_path, + workspace_payload=workspace_payload, + fallback_fixture_id=fixture_id, + ) + + normalized_items: list[OpenClawNormalizedItem] = [] + for source_file, entries in entries_by_file: + for index, entry in enumerate(entries): + normalized_items.append( + _normalize_entry( + entry=entry, + source_file=source_file, + entry_index=index, + workspace_id=context.workspace_id, + ) + ) + + if not normalized_items: + raise OpenClawAdapterValidationError("OpenClaw source did not contain any importable entries") + + return OpenClawNormalizedBatch( + context=context, + items=normalized_items, + ) + + +__all__ = [ + "OpenClawAdapterValidationError", + "load_openclaw_payload", +] diff --git a/apps/api/src/alicebot_api/openclaw_import.py b/apps/api/src/alicebot_api/openclaw_import.py new file mode 100644 index 0000000..5fb529a --- /dev/null +++ b/apps/api/src/alicebot_api/openclaw_import.py @@ -0,0 +1,124 @@ +from __future__ import annotations + +from pathlib import Path +from uuid import UUID + +from alicebot_api.openclaw_adapter import load_openclaw_payload +from alicebot_api.store import ContinuityStore, JsonObject + + +_OBJECT_TYPE_TO_SIGNAL: dict[str, str] = { + "Decision": "decision", + "NextAction": "next_action", + "Commitment": "commitment", + "WaitingFor": "waiting_for", + "Blocker": "blocker", + "MemoryFact": "remember_this", + "Note": "note", +} + + +def _existing_openclaw_dedupe_keys(store: ContinuityStore) -> set[str]: + dedupe_keys: set[str] = set() + for row in store.list_continuity_recall_candidates(): + provenance = row["provenance"] + if not isinstance(provenance, dict): + continue + if provenance.get("source_kind") != "openclaw_import": + continue + dedupe_key = provenance.get("openclaw_dedupe_key") + if isinstance(dedupe_key, str) and dedupe_key.strip() != "": + dedupe_keys.add(dedupe_key) + return dedupe_keys + + +def _deterministic_source_event_id(*, workspace_id: str, source_item_id: str) -> str: + return f"openclaw:{workspace_id}:{source_item_id}" + + +def import_openclaw_source( + store: ContinuityStore, + *, + user_id: UUID, + source: str | Path, +) -> JsonObject: + del user_id + + batch = load_openclaw_payload(source) + existing_dedupe_keys = _existing_openclaw_dedupe_keys(store) + run_dedupe_keys: set[str] = set() + + imported_object_ids: list[str] = [] + imported_capture_ids: list[str] = [] + skipped_duplicates = 0 + + for item in batch.items: + if item.dedupe_key in existing_dedupe_keys or item.dedupe_key in run_dedupe_keys: + skipped_duplicates += 1 + continue + + run_dedupe_keys.add(item.dedupe_key) + + capture = store.create_continuity_capture_event( + raw_content=item.raw_content, + explicit_signal=_OBJECT_TYPE_TO_SIGNAL[item.object_type], + admission_posture="DERIVED", + admission_reason="openclaw_import", + ) + + source_event_ids = item.source_provenance.get("source_event_ids") + if not isinstance(source_event_ids, list) or len(source_event_ids) == 0: + source_event_ids = [ + _deterministic_source_event_id( + workspace_id=batch.context.workspace_id, + source_item_id=item.source_item_id, + ) + ] + + provenance: JsonObject = { + **item.source_provenance, + "source_event_ids": source_event_ids, + "source_kind": "openclaw_import", + "openclaw_workspace_id": batch.context.workspace_id, + "openclaw_workspace_name": batch.context.workspace_name, + "openclaw_fixture_id": batch.context.fixture_id, + "openclaw_source_path": batch.context.source_path, + "openclaw_source_file": item.source_file, + "openclaw_source_item_id": item.source_item_id, + "openclaw_dedupe_key": item.dedupe_key, + "openclaw_dedupe_posture": "workspace_and_payload_fingerprint", + } + + continuity_object = store.create_continuity_object( + capture_event_id=capture["id"], + object_type=item.object_type, + status=item.status, + title=item.title, + body=item.body, + provenance=provenance, + confidence=item.confidence, + ) + + imported_capture_ids.append(str(capture["id"])) + imported_object_ids.append(str(continuity_object["id"])) + + imported_count = len(imported_object_ids) + status = "ok" if imported_count > 0 else "noop" + + return { + "status": status, + "source_path": batch.context.source_path, + "fixture_id": batch.context.fixture_id, + "workspace_id": batch.context.workspace_id, + "workspace_name": batch.context.workspace_name, + "total_candidates": len(batch.items), + "imported_count": imported_count, + "skipped_duplicates": skipped_duplicates, + "dedupe_posture": "workspace_and_payload_fingerprint", + "provenance_source_kind": "openclaw_import", + "imported_capture_event_ids": imported_capture_ids, + "imported_object_ids": imported_object_ids, + } + + +__all__ = ["import_openclaw_source"] diff --git a/apps/api/src/alicebot_api/openclaw_models.py b/apps/api/src/alicebot_api/openclaw_models.py new file mode 100644 index 0000000..1badb4f --- /dev/null +++ b/apps/api/src/alicebot_api/openclaw_models.py @@ -0,0 +1,200 @@ +from __future__ import annotations + +from dataclasses import dataclass +import json + +from alicebot_api.store import JsonObject + + +CONTINUITY_IMPORT_STATUSES = { + "active", + "stale", + "completed", + "cancelled", + "superseded", +} + + +class OpenClawAdapterValidationError(ValueError): + """Raised when an OpenClaw import payload is invalid.""" + + +@dataclass(frozen=True, slots=True) +class OpenClawWorkspaceContext: + fixture_id: str | None + workspace_id: str + workspace_name: str | None + source_path: str + + +@dataclass(frozen=True, slots=True) +class OpenClawNormalizedItem: + source_item_id: str + source_file: str + object_type: str + status: str + raw_content: str + title: str + body: JsonObject + confidence: float + source_provenance: JsonObject + dedupe_key: str + + +@dataclass(frozen=True, slots=True) +class OpenClawNormalizedBatch: + context: OpenClawWorkspaceContext + items: list[OpenClawNormalizedItem] + + +def normalize_optional_text(value: object) -> str | None: + if not isinstance(value, str): + return None + normalized = " ".join(value.split()).strip() + if normalized == "": + return None + return normalized + + +def normalize_required_text(value: object, *, field_name: str) -> str: + normalized = normalize_optional_text(value) + if normalized is None: + raise OpenClawAdapterValidationError(f"{field_name} must be a non-empty string") + return normalized + + +def parse_optional_confidence(value: object) -> float | None: + if value is None: + return None + + if isinstance(value, bool): + raise OpenClawAdapterValidationError("confidence must be a number") + + if isinstance(value, (int, float)): + parsed = float(value) + elif isinstance(value, str): + stripped = value.strip() + if stripped == "": + return None + try: + parsed = float(stripped) + except ValueError as exc: + raise OpenClawAdapterValidationError("confidence must be a number") from exc + else: + raise OpenClawAdapterValidationError("confidence must be a number") + + if parsed < 0.0 or parsed > 1.0: + raise OpenClawAdapterValidationError("confidence must be between 0.0 and 1.0") + return parsed + + +def parse_optional_status(value: object) -> str | None: + normalized = normalize_optional_text(value) + if normalized is None: + return None + lowered = normalized.casefold() + if lowered not in CONTINUITY_IMPORT_STATUSES: + supported = ", ".join(sorted(CONTINUITY_IMPORT_STATUSES)) + raise OpenClawAdapterValidationError( + f"status must be one of: {supported}" + ) + return lowered + + +def ensure_json_object(value: object, *, field_name: str) -> JsonObject: + if not isinstance(value, dict): + raise OpenClawAdapterValidationError(f"{field_name} must be a JSON object") + return value + + +def canonicalize_json(value: object) -> object: + if isinstance(value, dict): + return { + str(key): canonicalize_json(value[key]) + for key in sorted(value) + } + if isinstance(value, list): + return [canonicalize_json(item) for item in value] + return value + + +def canonical_json_string(value: object) -> str: + return json.dumps( + canonicalize_json(value), + sort_keys=True, + separators=(",", ":"), + ensure_ascii=True, + ) + + +def as_json_object(value: object) -> JsonObject: + if not isinstance(value, dict): + return {} + output: JsonObject = {} + for key, child in value.items(): + if not isinstance(key, str): + continue + output[key] = _as_json_value(child) + return output + + +def _as_json_value(value: object): + if value is None or isinstance(value, (str, int, float, bool)): + return value + if isinstance(value, list): + return [_as_json_value(item) for item in value] + if isinstance(value, dict): + return as_json_object(value) + return str(value) + + +def merge_json_objects(*payloads: JsonObject) -> JsonObject: + merged: JsonObject = {} + for payload in payloads: + merged.update(payload) + return merged + + +def pick_first_text(*candidates: object) -> str | None: + for candidate in candidates: + normalized = normalize_optional_text(candidate) + if normalized is not None: + return normalized + return None + + +def to_string_list(value: object) -> list[str]: + if isinstance(value, str): + normalized = normalize_optional_text(value) + return [] if normalized is None else [normalized] + + if isinstance(value, list): + items: list[str] = [] + seen: set[str] = set() + for raw in value: + normalized = normalize_optional_text(raw) + if normalized is None or normalized in seen: + continue + items.append(normalized) + seen.add(normalized) + return items + + return [] + + +__all__ = [ + "OpenClawAdapterValidationError", + "OpenClawNormalizedBatch", + "OpenClawNormalizedItem", + "OpenClawWorkspaceContext", + "as_json_object", + "canonical_json_string", + "ensure_json_object", + "merge_json_objects", + "normalize_optional_text", + "normalize_required_text", + "parse_optional_confidence", + "parse_optional_status", + "pick_first_text", + "to_string_list", +] diff --git a/docs/adr/ADR-004-openclaw-integration-boundary.md b/docs/adr/ADR-004-openclaw-integration-boundary.md new file mode 100644 index 0000000..89eb2ac --- /dev/null +++ b/docs/adr/ADR-004-openclaw-integration-boundary.md @@ -0,0 +1,50 @@ +# ADR-004: OpenClaw Integration Boundary + +## Status + +Accepted (2026-04-07) + +## Context + +`P9-S36` is the first external adapter sprint after the shipped public-core (`P9-S33`), CLI (`P9-S34`), and MCP transport (`P9-S35`) seams. The product goal for this sprint is proving Alice can ingest external agent memory while preserving the same continuity semantics already shipped. + +A broad importer framework in this sprint would add contract risk and blur the boundary between adapter work and platform work. + +## Decision + +Adopt a narrow OpenClaw-first integration boundary in `P9-S36`: + +- support file-based OpenClaw import only (JSON file or workspace directory with JSON memory payloads) +- map OpenClaw memory entries into shipped Alice continuity objects (no bypass path) +- preserve explicit imported provenance with `source_kind = openclaw_import` +- apply deterministic dedupe using a stable workspace+payload fingerprint +- keep MCP augmentation limited to existing shipped tools (`alice_recall`, `alice_resume`, etc.) without adding new MCP tools + +Input contract for this sprint is intentionally small: + +- root object payloads with one of `durable_memory`, `memories`, `items`, or `records` +- optional `workspace` metadata object +- optional directory contract using known JSON filenames (`workspace.json`, `openclaw_workspace.json`, `durable_memory.json`, `memories.json`, `openclaw_memories.json`) + +## Consequences + +Positive: + +- proves real external adapter ingestion without reopening continuity semantics +- keeps import behavior auditable and deterministic +- gives a concrete template for future importer work in `P9-S37` + +Negative: + +- does not yet provide generalized multi-source importer abstractions +- non-OpenClaw sources remain out of scope for this sprint + +## Alternatives Considered + +### Introduce a generic importer framework in `P9-S36` + +Rejected because it increases scope and contract surface before the first concrete adapter is proven. + +### Add new MCP import tools for OpenClaw + +Rejected because MCP surface expansion is out of `P9-S36` scope and would dilute parity guarantees with shipped continuity seams. diff --git a/docs/archive/planning/2026-04-07-phase9-bootstrap/ARCHITECTURE.md b/docs/archive/planning/2026-04-07-phase9-bootstrap/ARCHITECTURE.md new file mode 100644 index 0000000..93e0b8a --- /dev/null +++ b/docs/archive/planning/2026-04-07-phase9-bootstrap/ARCHITECTURE.md @@ -0,0 +1,107 @@ +# Architecture + +## Current Implemented Slice + +AliceBot now implements the accepted repo slice through Phase 3 Sprint 9. + +- `apps/api` is the core shipped surface. It provides continuity storage and review over `users`, `threads`, `sessions`, and append-only `events`; deterministic context compilation; governed memory admission and review plus open-loop lifecycle capture/review; embeddings and semantic retrieval; entities and entity edges; policy, tool, approval, and execution governance; the no-tools assistant-response seam at `POST /v0/responses`; explicit task and task-step lifecycle reads and mutations; rooted local task workspaces and artifact ingestion; artifact chunk retrieval and embeddings; and narrow read-only Gmail and Calendar seams with external-secret-backed credentials plus bounded Calendar event discovery and selected-item ingestion into the artifact pipeline. +- `apps/web` is a shipped operator shell over those backend seams, not a scaffold-only placeholder. The current routes are `/`, `/chat`, `/approvals`, `/tasks`, `/artifacts`, `/gmail`, `/calendar`, `/memories`, `/chief-of-staff`, `/entities`, and `/traces`. The shell can read live backend seams when configured and otherwise falls back to explicit fixture states instead of pretending the backend is connected. +- `/chat` now carries both shipped operator modes: governed request composition and assistant-response mode. It uses visible thread selection instead of a raw typed thread id, supports compact thread creation through the continuity API, renders a selected-thread transcript from immutable continuity events, and keeps supporting session and operational review, thread-linked governed workflow review, ordered task-step timeline review, and bounded explain-why trace review in the right rail. The rail now also includes a manual explicit-signal capture control that triggers `POST /v0/memories/capture-explicit-signals` only from explicit operator action on selected `message.user` events. +- `/gmail` and `/calendar` are shipped bounded connector workspaces over existing backend seams: visible account list review, selected-account detail, explicit account connection, and explicit single-item ingestion into one chosen task workspace, with live/fixture/unavailable states kept explicit. +- `/artifacts`, `/memories`, `/chief-of-staff`, and `/entities` are now shipped bounded review workspaces that expose existing artifact, memory (including open-loop review), chief-of-staff priority plus follow-through plus preparation/resumption plus weekly review/outcome-learning plus handoff-queue/review supervision, and entity read seams with explicit live/fixture/unavailable modes. +- `workers` remains scaffold-only. No background runner, automatic multi-step progression, or asynchronous job system is implemented. + +The repo is intentionally still narrow. Document ingestion remains local and deterministic. The only live execution handler is the no-external-I/O `proxy.echo` path. Gmail and Calendar remain read-only; Calendar includes bounded event discovery and selected-event ingestion only. Rich parsing, mailbox sync, attachments, broader Calendar capabilities, broader proxying, and runner-style orchestration are still planned later. + +## Implemented Now + +### Runtime + +- `docker-compose.yml` starts local Postgres with `pgvector`, Redis, and MinIO. +- `scripts/dev_up.sh`, `scripts/migrate.sh`, and `scripts/api_dev.sh` provide the local startup path. +- `scripts/run_phase3_acceptance.py`, `scripts/run_phase3_readiness_gates.py`, and `scripts/run_phase3_validation_matrix.py` are the canonical Phase 3 gate entrypoints; they preserve deterministic gate semantics by delegating to existing Phase 2 implementations. `scripts/run_phase2_*.py` and `scripts/run_mvp_*.py` remain supported compatibility entrypoints with identical semantics. +- `apps/api` exposes FastAPI endpoints for: + - continuity and response generation: `/healthz`, `POST /v0/threads`, `GET /v0/threads`, `GET /v0/threads/{thread_id}`, `GET /v0/threads/{thread_id}/sessions`, `GET /v0/threads/{thread_id}/events`, `GET /v0/threads/{thread_id}/resumption-brief`, `GET /v0/chief-of-staff`, `POST /v0/chief-of-staff/recommendation-outcomes`, `POST /v0/chief-of-staff/handoff-review-actions`, `POST /v0/context/compile`, `POST /v0/responses` + - memory and open-loop seams, including `POST /v0/memories/admit`, `POST /v0/memories/capture-explicit-signals`, `GET /v0/open-loops`, `GET /v0/open-loops/{open_loop_id}`, `POST /v0/open-loops`, `POST /v0/open-loops/{open_loop_id}/status`, and `POST /v0/open-loops/extract-explicit-commitments` + - embeddings and graph seams + - policy, tool, approval, execution-budget, and proxy execution governance + - task, task-step, task-workspace, task-artifact, artifact-chunk, and trace review reads and mutations + - narrow Gmail account connect/read plus selected-message ingestion + - narrow Calendar account connect/read, bounded event discovery, plus selected-event ingestion +- `apps/web` exposes the current operator shell: + - `/`: bounded home view over the shipped shell surfaces + - `/chat`: assistant mode, governed request mode, thread selection, thread creation, transcript-first continuity review, deterministic resumption brief review, manual explicit-signal capture control for selected `message.user` events, thread-linked governed workflow and task-step timeline review, bounded explain-why embedding, and bounded supporting operational review + - `/approvals`: approval inbox and execution review + - `/tasks`: task summary and ordered task-step review + - `/artifacts`: artifact list and selected detail, linked workspace summary, and ordered chunk review + - `/gmail`: connected-account review, selected-account detail, explicit connect, and selected-message ingestion into one chosen task workspace + - `/calendar`: connected-account review, selected-account detail, explicit connect, and selected-event ingestion into one chosen task workspace + - `/memories`: memory summary and queue posture, selected detail, revision review, label review, plus open-loop summary/list/detail review + - `/chief-of-staff`: deterministic priority dashboard plus deterministic follow-through supervision, deterministic preparation/resumption artifacts (`preparation_brief`, `what_changed_summary`, `prep_checklist`, `suggested_talking_points`, `resumption_supervision`), deterministic weekly review/outcome-learning artifacts (`weekly_review_brief`, `recommendation_outcomes`, `priority_learning_summary`, `pattern_drift_summary`), and deterministic handoff queue/review artifacts (`handoff_queue_summary`, `handoff_queue_groups`, `handoff_review_actions`) with explicit lifecycle posture (`ready`, `pending_approval`, `executed`, `stale`, `expired`) and auditable review-action controls + - `/entities`: entity list and selected detail with related edge review + - `/traces`: trace summary, detail, and ordered event review +- `tests` cover both the backend seams and the web shell. Durable repo evidence includes integration coverage for continuity and responses plus Vitest coverage for `/chat`, thread selection, bounded continuity review, and assistant-response submission. + +### Data And Safety Boundaries + +- Postgres is the system of record. +- Row-level security is enforced on user-owned continuity, trace, memory, governance, task, workspace, artifact, Gmail, and Calendar tables. +- `events`, `trace_events`, and `memory_revisions` are append-only by contract. +- Task-step lineage and execution linkage stay explicit through `parent_step_id`, `source_approval_id`, `source_execution_id`, and `tool_executions.task_step_id`. +- Task workspaces are rooted local directories under `TASK_WORKSPACE_ROOT`. +- Task artifacts are explicit rooted local-file registrations only; compile and retrieval read persisted chunk rows, not raw files. +- Gmail and Calendar credential material stay off normal metadata tables and flow through dedicated secret-manager seams. + +## Core Flows Implemented Now + +### Continuity And Chat + +1. `POST /v0/threads` creates one visible thread. +2. `GET /v0/threads`, `GET /v0/threads/{thread_id}`, `GET /v0/threads/{thread_id}/sessions`, and `GET /v0/threads/{thread_id}/events` expose bounded continuity review over persisted records. +3. `GET /v0/threads/{thread_id}/resumption-brief` assembles a deterministic bounded resumption snapshot for the selected thread. +4. `POST /v0/responses` compiles context deterministically, persists the submitted user message plus the assistant reply as immutable events, and returns linked compile and response trace metadata. +5. `/chat` consumes those shipped seams directly. Selected-thread identity stays explicit across assistant and governed-request modes, immutable thread events drive the primary transcript surface, and non-conversation continuity stays in bounded supporting review instead of polluting the main conversation record. +6. `/chat` explicit-signal capture is bounded and manual: the right rail surfaces eligible `message.user` events, capture runs only on button click through `POST /v0/memories/capture-explicit-signals`, and fixture/unavailable states remain explicitly non-destructive/disabled. + +### Memory And Open-Loop Review + +1. `POST /v0/memories/admit` supports the typed memory admission seam and optional open-loop creation in the same admission request. +2. Open loops persist as a first-class lifecycle seam with deterministic status transitions (`open`, `resolved`, `dismissed`) and strict user scoping on list/detail/mutation reads. +3. `POST /v0/context/compile` includes a bounded, deterministically ordered open-loop slice and summary when open loops are present. +4. `/memories` exposes open-loop review alongside memory review so operators can inspect unresolved commitments and selected lifecycle details in one surface. + +### Governance, Tasks, And Explainability + +1. `POST /v0/approvals/requests` creates one task and one initial task step for each governed request. +2. Approval resolution and approved execution reuse explicit task-step linkage instead of inferring from first-step-only assumptions. +3. `GET /v0/approvals`, `GET /v0/tasks`, `GET /v0/tasks/{task_id}/steps`, `GET /v0/tool-executions`, `GET /v0/traces`, and related detail reads expose durable review state through the web shell, including thread-linked workflow review and ordered task-step timeline review in `/chat`. +4. The shipped explainability surface is calm and bounded: `/chat` embeds selected-thread explain-why review over linked trace targets, with summary first, detail second, and ordered trace events last. + +### Workspaces, Artifacts, Gmail, And Calendar + +1. Tasks can provision one rooted local workspace and register local artifacts under that boundary. +2. Artifact ingestion supports the narrow current set only: plain text, markdown, narrow local PDF text extraction, narrow DOCX text extraction from `word/document.xml`, and narrow RFC822 email extraction. +3. Artifact retrieval works over persisted chunk rows and persisted chunk embeddings, including deterministic lexical retrieval, direct semantic retrieval, and the current lexical-first hybrid compile merge. +4. Gmail remains narrow: one read-only account seam, secret-free account reads, external-secret-backed primary credentials, refresh-token renewal and rotation handling, and one selected-message ingestion path that lands in the existing RFC822 artifact workflow. +5. Calendar remains narrow: one read-only account seam, secret-free account reads, external-secret-backed credentials, bounded event discovery (`GET /v0/calendar-accounts/{calendar_account_id}/events`) with deterministic ordering and bounded limits, and one selected-event ingestion path that lands in the existing text artifact/chunk workflow. + +## Testing Coverage Implemented Now + +- Backend continuity and response seams are covered in `tests/integration/test_continuity_api.py`, `tests/integration/test_continuity_store.py`, `tests/integration/test_responses_api.py`, and related unit coverage under `tests/unit`. +- Web continuity and `/chat` operator review adoption are covered in `apps/web/app/chat/page.test.tsx`, `apps/web/components/thread-list.test.tsx`, `apps/web/components/thread-summary.test.tsx`, `apps/web/components/thread-event-list.test.tsx`, `apps/web/components/thread-create.test.tsx`, `apps/web/components/response-composer.test.tsx`, `apps/web/components/thread-workflow-panel.test.tsx`, `apps/web/components/task-step-list.test.tsx`, `apps/web/components/response-history.test.tsx`, and `apps/web/components/thread-trace-panel.test.tsx`, including manual explicit-signal capture control behavior, live/fixture/unavailable gating, deterministic result/error rendering, and continuity-rendering regression assertions in `thread-event-list` tests. +- Review workspaces are covered at route level in `apps/web/app/artifacts/page.test.tsx`, `apps/web/app/memories/page.test.tsx`, `apps/web/app/chief-of-staff/page.test.tsx`, and `apps/web/app/entities/page.test.tsx`, with matching component and API-client coverage under `apps/web` (including `apps/web/components/chief-of-staff-follow-through-panel.test.tsx`, `apps/web/components/chief-of-staff-preparation-panel.test.tsx`, `apps/web/components/chief-of-staff-weekly-review-panel.test.tsx`, `apps/web/components/chief-of-staff-handoff-queue-panel.test.tsx`, and `apps/web/lib/api.test.ts` chief-of-staff contract assertions). +- Connector workspaces are covered through `apps/web/lib/api.test.ts`, `apps/web/components/gmail-account-list.test.tsx`, `apps/web/components/calendar-account-list.test.tsx`, and `apps/web/components/calendar-event-ingest-form.test.tsx`. +- The shell also has route and API-client coverage for approvals, tasks, traces, and shared API utilities under `apps/web`. + +## Planned Later + +The following remain planned later and must not be described as implemented: + +- runner-style orchestration and automatic multi-step progression +- auth beyond the current database user-context model +- richer document parsing, OCR, image extraction, or layout reconstruction +- Gmail search, mailbox sync, attachment ingestion, write-capable Gmail actions, and broader Calendar capabilities such as recurrence expansion, sync, and write actions +- broader proxy execution breadth or real-world side effects beyond `proxy.echo` +- retrieval reranking or weighted fusion beyond the current lexical-first hybrid compile merge + +Future planning should start from the shipped API-plus-web-shell baseline above, not from older Gmail-era or scaffold-era descriptions. diff --git a/docs/archive/planning/2026-04-07-phase9-bootstrap/PRODUCT_BRIEF.md b/docs/archive/planning/2026-04-07-phase9-bootstrap/PRODUCT_BRIEF.md new file mode 100644 index 0000000..6846e36 --- /dev/null +++ b/docs/archive/planning/2026-04-07-phase9-bootstrap/PRODUCT_BRIEF.md @@ -0,0 +1,77 @@ +# Product Brief + +## Product Summary + +AliceBot is a private, permissioned personal AI operating system for a single primary user. It is designed to preserve durable personal context, retrieve the right context at the right time, and move safely from conversation to action without hiding why it acted. + +## Problem + +General-purpose assistants forget preferences, prior decisions, and relationships across sessions. They also make it difficult to audit why they answered a certain way or whether a tool action was properly governed. The result is low trust, repeated user effort, and unsafe action handling. + +## Target Users + +- Primary v1 user: one power user with recurring life and work workflows. +- Delivery model: a human lead working with AI builders and reviewers. +- Architectural assumption: v1 UX is single-user, but the data model must support strict per-user isolation from day one. + +## Core Value Proposition + +- Durable memory for preferences, relationships, prior decisions, and recurring tasks. +- Deterministic context compilation instead of ad hoc prompt stuffing. +- Safe action orchestration with policy checks, approvals, and budgets. +- Clear explainability through traces, memory evidence, and tool history. + +## V1 Scope + +- Web-based chat and task orchestration. +- Immutable thread and session continuity. +- Structured memory with admission controls, revision history, and user review. +- Entity and relationship tracking for people, merchants, products, projects, and routines. +- Hybrid retrieval across memories, entities, relationships, and documents. +- Policy engine, tool proxy, approval workflows, and task budgets. +- Scoped task workspaces and artifact storage. +- Read-only document ingestion plus read-only Gmail and Calendar connectors. +- Hot consolidation for immediate truth updates and cold consolidation for cleanup and summarization. +- Explain-why views for important responses and actions. + +## Non-Goals + +- Autonomous side effects without user approval. +- Multi-user collaboration UX in v1. +- Mobile-first delivery. +- Dedicated graph or vector infrastructure in v1. +- Browser automation, write-capable connectors, proactive automations, and voice at launch. + +## Key User Journeys + +1. Ask a question that depends on prior preferences, purchases, or relationships and get a context-aware answer without restating history. +2. Correct a preference or fact and have the next turn reflect the new truth immediately. +3. Inspect why the system answered or proposed an action by reviewing memories, retrieval choices, and tool traces. +4. Run a repeat-purchase workflow that gathers prior context, proposes the order, pauses for approval, and records the outcome. +5. Retrieve relevant context from documents, Gmail, or Calendar without granting write access. + +## Constraints + +- Single-user product experience, multi-tenant-safe architecture. +- Web-first v1. +- Explicit approval for consequential actions. +- Operational simplicity beats platform sprawl in v1. +- Memory quality, retrieval quality, and explainability are ship-gating concerns. + +## Success Criteria + +- The system recalls relevant preferences, past purchases, relationships, and prior decisions without repeated user restatement. +- The repeat magnesium reorder workflow succeeds end to end with approval gating and memory write-back. +- Every consequential action is explainable through trace, memory, rule, and tool evidence. +- Purchases, emails, bookings, and other side effects never occur without explicit approval. +- Standard retrieval-plus-response interactions reach p95 latency under 5 seconds. +- Prompt and cache reuse exceeds 70% on repeated patterns. +- Memory extraction precision exceeds 80% at ship. + +## Product Non-Negotiables + +- The user stays in control of consequential actions. +- Durable context must come from governed storage, not raw transcript stuffing. +- Explainability is a product requirement, not a debugging feature. +- Preference contradictions must be reflected immediately. +- The repeat magnesium reorder scenario is the canonical v1 release-readiness validation scenario. diff --git a/docs/archive/planning/2026-04-07-phase9-bootstrap/README.md b/docs/archive/planning/2026-04-07-phase9-bootstrap/README.md new file mode 100644 index 0000000..303cb97 --- /dev/null +++ b/docs/archive/planning/2026-04-07-phase9-bootstrap/README.md @@ -0,0 +1,149 @@ +# AliceBot + +AliceBot is a private, permissioned personal AI operating system. The canonical baseline remains through Phase 3 Sprint 9, with earlier Phase 4 work already delivering run linkage/idempotent replay safety and run observability/retry-failure discipline, Phase 4 Sprint 14 establishing canonical MVP release-gate ownership in Phase 4 gate scripts, Phase 4 Sprint 15 adding deterministic release-candidate rehearsal evidence packaging, Phase 4 Sprint 16 adding durable archive/index evidence retention for repeated RC rehearsal runs, Phase 4 Sprint 17 hardening archive index writes with deterministic locking and atomic replace behavior under contention, Phase 4 Sprint 18 adding deterministic MVP exit manifest generation/verification for formal phase closeout, and Phase 4 Sprint 19 adding deterministic MVP qualification orchestration plus a formal GO/NO_GO sign-off record/verifier. Phase 5 Sprint 17 shipped the typed continuity capture backbone, Phase 5 Sprint 18 shipped provenance-backed recall plus deterministic continuity resumption briefs, Phase 5 Sprint 19 shipped continuity review/correction with explicit freshness and supersession posture, Phase 5 Sprint 20 shipped open-loop dashboard plus deterministic daily/weekly review flows, Phase 6 Sprint 21 shipped canonical memory-quality gate semantics plus deterministic memory review-queue prioritization, Phase 6 Sprint 22 shipped retrieval-quality evaluation plus continuity-recall ranking calibration, Phase 6 Sprint 23 shipped correction-impact and freshness-hygiene weekly reliability signals, and Phase 6 Sprint 24 shipped trust dashboard and quality release evidence seams. Phase 6 is complete, Phase 7 is complete (`P7-S25` through `P7-S28`), Phase 8 Sprint 29 (P8-S29), Sprint 30 (P8-S30), and Sprint 31 (P8-S31) are shipped baseline, and the active sprint packet is Phase 8 Sprint 32 (P8-S32): outcome learning and closure quality. Phase 8 planning anchors are `docs/phase8-product-spec.md` and `docs/phase8-sprint-29-32-plan.md`. + +## Current Implemented Slice + +- `apps/api` is the core shipped surface. It includes continuity, context compilation, assistant responses, typed memory admission/review and open-loop lifecycle seams, deterministic thread resumption brief reads, explicit-signal capture, policy/tool/approval governance, execution budgets, tasks and task steps, rooted local workspaces and artifacts, artifact retrieval, traces, and narrow read-only Gmail and Calendar seams with bounded event discovery plus selected-item ingestion. +- `apps/api` now also ships Phase 7 Sprint 27 chief-of-staff preparation and resumption seams: + - `GET /v0/chief-of-staff` continues deterministic P7-S25/P7-S26 ranking/follow-through output and now also includes: + - `preparation_brief` + - `what_changed_summary` + - `prep_checklist` + - `suggested_talking_points` + - `resumption_supervision` + - preparation and resumption recommendations are provenance-backed and trust-calibrated with explicit confidence posture. + - low-trust memory posture visibly downgrades recommendation confidence in preparation and resumption artifacts. + - `draft_follow_up` remains draft-only artifact output with explicit approval-bounded non-send posture (`mode=draft_only`, `approval_required=true`, `auto_send=false`). + - all chief-of-staff composition still reuses shipped continuity + trust inputs (`continuity/recall`, `continuity/open-loops`, `continuity/resumption-brief`, `memories/trust-dashboard`) without widening connector or side-effect scope. +- `apps/api` now also ships Phase 7 Sprint 28 chief-of-staff weekly review and outcome-learning seams: + - `GET /v0/chief-of-staff` now also includes deterministic `weekly_review_brief`, `recommendation_outcomes`, `priority_learning_summary`, and `pattern_drift_summary`. + - `POST /v0/chief-of-staff/recommendation-outcomes` captures explicit recommendation outcomes (`accept`, `defer`, `ignore`, `rewrite`) as auditable continuity records. + - weekly-review guidance is explicit and deterministic for close/defer/escalate decisions. +- `apps/api` now also ships Phase 8 Sprint 29 chief-of-staff action handoff seams: + - `GET /v0/chief-of-staff` now also includes deterministic `action_handoff_brief`, `handoff_items`, `task_draft`, `approval_draft`, and `execution_posture`. + - handoff items deterministically map top recommendations from priority/follow-through/preparation/weekly-review signals into governed task/approval-ready draft structures with explicit rationale and provenance. + - execution posture is explicit and non-autonomous (`approval_bounded_artifact_only`, approval required, no autonomous execution or external side effects). +- `apps/api` now also ships Phase 8 Sprint 30 chief-of-staff handoff queue and review seams: + - `GET /v0/chief-of-staff` now also includes deterministic `handoff_queue_summary`, `handoff_queue_groups`, and `handoff_review_actions`. + - queue lifecycle posture is explicit (`ready`, `pending_approval`, `executed`, `stale`, `expired`) with deterministic grouped ordering metadata. + - `POST /v0/chief-of-staff/handoff-review-actions` captures explicit operator review actions for lifecycle transitions as auditable continuity records. + - stale and expired handoff items remain visible in grouped queue output and are not silently dropped. +- `apps/api` now also ships Phase 8 Sprint 31 chief-of-staff governed execution routing seams: + - `GET /v0/chief-of-staff` now also includes deterministic `execution_routing_summary`, `routed_handoff_items`, `routing_audit_trail`, and `execution_readiness_posture`. + - `POST /v0/chief-of-staff/execution-routing-actions` captures explicit routing transitions into governed draft targets (`task_workflow_draft`, `approval_workflow_draft`, `follow_up_draft_only`). + - routing transitions are explicit and auditable (`routed`, `reaffirmed`) while keeping approval-required, draft-only non-autonomous posture. +- `apps/api` now also ships Phase 8 Sprint 32 chief-of-staff outcome-learning and closure-quality seams: + - `GET /v0/chief-of-staff` now also includes deterministic `handoff_outcome_summary`, `handoff_outcomes`, `closure_quality_summary`, `conversion_signal_summary`, and `stale_ignored_escalation_posture`. + - `POST /v0/chief-of-staff/handoff-outcomes` captures explicit routed-handoff outcomes (`reviewed`, `approved`, `rejected`, `rewritten`, `executed`, `ignored`, `expired`) as immutable continuity records. + - closure-quality, recommendation-to-execution conversion, and stale/ignored escalation posture are derived deterministically from latest immutable outcome state per handoff item. + - outcome learning remains explicit and approval-bounded; no autonomous execution or connector side effects are introduced. +- `apps/api` now also ships Phase 6 Sprint 21 memory-quality seams: + - `GET /v0/memories/quality-gate` for canonical server-side quality posture (`healthy`, `needs_review`, `insufficient_sample`, `degraded`) with deterministic computation counts. + - `GET /v0/memories/review-queue` supports explicit deterministic priority modes: + - `oldest_first` + - `recent_first` + - `high_risk_first` + - `stale_truth_first` + - review-queue payloads include explicit ordering metadata and per-item priority posture fields. +- `apps/api` now also ships Phase 6 Sprint 22 retrieval-quality seams: + - `GET /v0/continuity/retrieval-evaluation` returns deterministic fixture-backed precision summaries and top-result ordering evidence. + - `GET /v0/continuity/recall` ranking metadata now explicitly surfaces freshness, provenance quality, and supersession posture contributions in deterministic ordering output. +- `apps/api` now also ships Phase 6 Sprint 24 trust/evidence seams: + - `GET /v0/memories/trust-dashboard` aggregates canonical memory-quality gate posture, queue posture/aging summary, retrieval-quality summary, correction recurrence/freshness drift summary, and deterministic recommended next review action. + - `python3 scripts/run_phase6_quality_evidence.py` writes deterministic quality evidence at `artifacts/release/phase6_quality_evidence.json`. + - Phase 4 reporting scripts now include additive quality evidence summary sections without changing GO/NO_GO pass/fail semantics. +- `apps/api` now also ships Phase 5 continuity capture/retrieval/review seams: + - capture backbone endpoints from Sprint 17: + - `POST /v0/continuity/captures` + - `GET /v0/continuity/captures` + - `GET /v0/continuity/captures/{capture_event_id}` + - Sprint 18 retrieval/resumption endpoints: + - `GET /v0/continuity/recall` + - `GET /v0/continuity/resumption-brief` + - Sprint 19 review/correction endpoints: + - `GET /v0/continuity/review-queue` + - `GET /v0/continuity/review-queue/{continuity_object_id}` + - `POST /v0/continuity/review-queue/{continuity_object_id}/corrections` + - Sprint 20 open-loop and review briefing endpoints: + - `GET /v0/continuity/open-loops` + - `GET /v0/continuity/daily-brief` + - `GET /v0/continuity/weekly-review` + - `POST /v0/continuity/open-loops/{continuity_object_id}/review-action` + - recall/resumption responses expose scoped filters, deterministic ordering metadata, confirmation/admission posture, and provenance references. + - correction flows append immutable correction events before lifecycle mutation and expose freshness/supersession metadata (`last_confirmed_at`, `supersedes_object_id`, `superseded_by_object_id`). + - open-loop review actions (`done`, `deferred`, `still_blocked`) are deterministic, auditable, and reflected immediately in continuity resumption output. + - weekly review rollup includes deterministic correction/freshness evidence metrics: `correction_recurrence_count` and `freshness_drift_count`. +- `apps/web` is shipped operator UI, not scaffold-only. The shell includes `/`, `/chat`, `/approvals`, `/tasks`, `/artifacts`, `/gmail`, `/calendar`, `/memories`, `/chief-of-staff`, `/entities`, and `/traces`, with live-backend reads when configured and explicit fixture fallback otherwise. +- `/chief-of-staff` now ships P7-S27 preparation/resumption supervision on top of P7-S25/P7-S26: deterministic priority dashboard, deterministic follow-through panel, and deterministic preparation panel with what-changed, checklist, talking points, and resumption supervision artifacts. +- `/chief-of-staff` now also ships P7-S28 weekly review and outcome-learning supervision: deterministic weekly review guidance, explicit recommendation outcome-capture controls, and visible priority-learning/pattern-drift summaries. +- `/chief-of-staff` now also ships P8-S29 action handoff supervision: explicit approval-bounded execution posture, deterministic handoff brief, and visible task/approval draft artifacts with provenance-backed rationale. +- `/chief-of-staff` now also ships P8-S30 queue and operational review supervision: deterministic grouped handoff queue posture, explicit stale/expired visibility, and operator lifecycle review controls with auditable review-action history. +- `/chief-of-staff` now also ships P8-S31 governed execution routing supervision: explicit execution-readiness posture, route controls for governed draft targets, and auditable routing transition history. +- `/chief-of-staff` now also ships P8-S32 outcome-learning and closure-quality supervision: explicit routed-handoff outcome capture controls, deterministic closure/conversion signal visibility, and stale/ignored escalation posture visibility. +- `/memories` is aligned to Phase 6 Sprint 21 canonical memory-quality semantics: + - quality gate posture is consumed from API-backed `GET /v0/memories/quality-gate` contract via the web API layer. + - queue review mode can be selected explicitly (`oldest_first`, `recent_first`, `high_risk_first`, `stale_truth_first`) without breaking existing `submit` / `submit_and_next` labeling flow. +- `apps/web` now also includes `/continuity` as the Phase 5 continuity workspace with: + - Sprint 17 fast-capture inbox submit/list/detail + - Sprint 18 recall query/results panel with provenance-backed cards + - Sprint 22 recall ranking posture evidence in UI (`freshness`, `provenance`, `supersession`) + - Sprint 18 deterministic resumption-brief panel with required explicit sections + - Sprint 19 review queue and correction form with correction history and supersession chain visibility + - Sprint 20 open-loop dashboard with grouped posture sections and review-action controls + - Sprint 20 deterministic daily brief and weekly review panels with explicit empty states +- `/chat` supports both assistant and governed-request modes, selected-thread continuity, compact thread creation, deterministic resumption brief review, thread-linked governed workflow review, ordered task-step timeline review, bounded explain-why trace embedding, manual explicit-signal capture controls for selected `message.user` events, and bounded supporting continuity review. +- `/gmail` and `/calendar` are shipped bounded connector workspaces for account review, selected-account detail, explicit account connection, and explicit single-item ingestion into one selected task workspace. +- `/artifacts`, `/memories`, and `/entities` are shipped bounded operator review workspaces for artifact, memory, and entity evidence. +- `workers` includes bounded task-run ticking and approved proxy execution progression under the workflow-style durable run model. + +## Quick Start + +1. Create a local env file: `cp .env.example .env` +2. Start infrastructure: `docker compose up -d` +3. Create a virtualenv and install dependencies: `python3 -m venv .venv && ./.venv/bin/python -m pip install -e '.[dev]'` +4. Apply migrations: `./scripts/migrate.sh` +5. Start the API: `./scripts/api_dev.sh` + +Useful checks: + +- Canonical gate entrypoints: `scripts/run_phase4_*.py` are the control-plane canonical MVP release gates; `scripts/run_phase3_*.py`, `scripts/run_phase2_*.py`, and `scripts/run_mvp_*.py` remain compatibility entrypoints with identical semantics. +- Phase 6 quality evidence command: `python3 scripts/run_phase6_quality_evidence.py` (writes deterministic trust-dashboard evidence artifact for release/readiness reporting). +- Phase 4 MVP qualification command: `python3 scripts/run_phase4_mvp_qualification.py` (runs RC rehearsal -> RC archive verify -> MVP exit manifest generation -> MVP exit manifest verify; writes `artifacts/release/phase4_mvp_signoff_record.json`) +- Phase 4 MVP sign-off verifier: `python3 scripts/verify_phase4_mvp_signoff_record.py` (validates sign-off schema, required references, and GO/NO_GO consistency) +- Phase 4 RC rehearsal command: `python3 scripts/run_phase4_release_candidate.py` (writes latest summary `artifacts/release/phase4_rc_summary.json` and appends archive evidence in `artifacts/release/archive/`) +- RC archive hardening contract: index updates are serialized by `artifacts/release/archive/index.lock`; lock timeout exits with code `2` and explicit failure message +- Phase 4 RC archive verifier: `python3 scripts/verify_phase4_rc_archive.py` (validates `artifacts/release/archive/index.json` against retained archive artifacts) +- Phase 4 MVP exit manifest generator: `python3 scripts/generate_phase4_mvp_exit_manifest.py` (writes deterministic closeout artifact `artifacts/release/phase4_mvp_exit_manifest.json` from latest GO RC archive evidence) +- Phase 4 MVP exit manifest verifier: `python3 scripts/verify_phase4_mvp_exit_manifest.py` (validates manifest schema, required fields, and source archive/index references) +- Phase 4 entrypoints: `python3 scripts/run_phase4_acceptance.py`, `python3 scripts/run_phase4_readiness_gates.py`, `python3 scripts/run_phase4_validation_matrix.py` +- API health: [http://127.0.0.1:8000/healthz](http://127.0.0.1:8000/healthz) +- Phase 3 acceptance gate: `python3 scripts/run_phase3_acceptance.py` +- Phase 3 readiness gates: `python3 scripts/run_phase3_readiness_gates.py` +- Phase 3 default go/no-go validation gate: `python3 scripts/run_phase3_validation_matrix.py` +- Phase 2 compatibility validation gate: `python3 scripts/run_phase2_validation_matrix.py` +- MVP alias gates (identical semantics): `python3 scripts/run_mvp_acceptance.py`, `python3 scripts/run_mvp_readiness_gates.py`, `python3 scripts/run_mvp_validation_matrix.py` +- Backend tests: `./.venv/bin/python -m pytest tests/unit tests/integration` +- Web tests: `pnpm --dir apps/web test` +- Web shell: `pnpm --dir apps/web dev` + +## Repo Map + +- [PRODUCT_BRIEF.md](PRODUCT_BRIEF.md): stable product scope and release-readiness anchors +- [ARCHITECTURE.md](ARCHITECTURE.md): implemented technical boundaries +- [ROADMAP.md](ROADMAP.md): forward planning from the current repo state +- [RULES.md](RULES.md): durable engineering and scope rules +- [.ai/handoff/CURRENT_STATE.md](.ai/handoff/CURRENT_STATE.md): compact recovery snapshot +- [BUILD_REPORT.md](BUILD_REPORT.md): current sprint build report +- [REVIEW_REPORT.md](REVIEW_REPORT.md): current sprint review report +- [docs/phase8-product-spec.md](docs/phase8-product-spec.md): Phase 8 product scope and constraints +- [docs/phase8-sprint-29-32-plan.md](docs/phase8-sprint-29-32-plan.md): Phase 8 sprint sequencing +- [docs/archive/sprints](docs/archive/sprints): accepted historical sprint build and review artifacts + +## Environment Notes + +- Postgres is the system of record. +- Local Docker Compose includes Postgres with `pgvector`, Redis, and MinIO. +- Helper scripts source the repo-root `.env` and prefer `.venv/bin/python` when present. +- `TASK_WORKSPACE_ROOT` defaults to `/tmp/alicebot/task-workspaces`. +- `/healthz` performs a live Postgres check; Redis and MinIO are reported as configured but not live-checked. diff --git a/docs/archive/planning/2026-04-07-phase9-bootstrap/ROADMAP.md b/docs/archive/planning/2026-04-07-phase9-bootstrap/ROADMAP.md new file mode 100644 index 0000000..5670a38 --- /dev/null +++ b/docs/archive/planning/2026-04-07-phase9-bootstrap/ROADMAP.md @@ -0,0 +1,141 @@ +# Roadmap + +## Current Position + +- The canonical repo baseline remains through Phase 3 Sprint 9. +- Earlier Phase 4 increments are delivered on top of that baseline: run-aware execution linkage, idempotent replay controls, approval-to-run pause/resume linkage, explicit run transitions/stop reasons, bounded retry posture, explicit failure classes, and deterministic gate runners. +- Phase 4 Sprint 14 is the active release-control layer. +- Phase 4 Sprint 14 established the release-control layer: Phase 4 release-control is complete and remains the gate baseline, Phase 4 owns acceptance/readiness/validation semantics directly, canonical magnesium reorder ship-gate evidence is first-class, and compatibility gates stay green. +- Phase 4 Sprint 15 adds deterministic MVP release-candidate rehearsal orchestration via `python3 scripts/run_phase4_release_candidate.py`, producing `artifacts/release/phase4_rc_summary.json` with explicit per-step evidence and final GO/NO_GO. +- Phase 4 Sprint 16 adds durable RC evidence retention: each rehearsal run now writes an archive copy plus append-only audit ledger at `artifacts/release/archive/index.json`, while preserving the latest-summary compatibility path. +- Phase 4 Sprint 17 hardens RC archive/index writes for concurrency: deterministic lock path (`artifacts/release/archive/index.lock`), bounded lock-timeout contract, and atomic index replace persistence. +- Phase 4 Sprint 18 adds formal closeout evidence tooling: deterministic MVP exit manifest generation (`python3 scripts/generate_phase4_mvp_exit_manifest.py`) and manifest verification (`python3 scripts/verify_phase4_mvp_exit_manifest.py`) from latest GO RC archive evidence. +- Phase 4 Sprint 19 adds deterministic MVP qualification orchestration (`python3 scripts/run_phase4_mvp_qualification.py`) and formal sign-off record verification (`python3 scripts/verify_phase4_mvp_signoff_record.py`) with explicit GO/NO_GO blocker registry. +- Phase 5 Sprint 17 adds the typed continuity capture backbone: immutable `continuity_capture_events`, typed `continuity_objects`, conservative admission posture (`DERIVED`/`TRIAGE`), and the fast capture inbox UI/API surface at `/continuity`. +- Phase 5 Sprint 18 adds provenance-backed recall and deterministic continuity resumption surfaces: `GET /v0/continuity/recall`, `GET /v0/continuity/resumption-brief`, and `/continuity` recall/resumption panels with always-present required sections. +- Phase 5 Sprint 19 adds continuity review/correction and freshness posture: `GET /v0/continuity/review-queue`, `GET /v0/continuity/review-queue/{continuity_object_id}`, `POST /v0/continuity/review-queue/{continuity_object_id}/corrections`, append-only correction events, and immediate recall/resumption correction impact with supersession-chain visibility. +- Phase 5 Sprint 20 adds deterministic open-loop/daily/weekly executive-function seams: `GET /v0/continuity/open-loops`, `GET /v0/continuity/daily-brief`, `GET /v0/continuity/weekly-review`, `POST /v0/continuity/open-loops/{continuity_object_id}/review-action`, grouped posture ordering (`waiting_for`, `blocker`, `stale`, `next_action`), and immediate resumption refresh after `done`/`deferred`/`still_blocked` actions. +- Phase 6 Sprint 21 adds canonical memory-quality gate and deterministic review prioritization seams: `GET /v0/memories/quality-gate` with canonical statuses (`healthy`, `needs_review`, `insufficient_sample`, `degraded`), deterministic queue ordering modes (`oldest_first`, `recent_first`, `high_risk_first`, `stale_truth_first`) on `GET /v0/memories/review-queue`, and `/memories` UI alignment to API-backed quality-gate semantics plus priority-mode selection. +- Phase 6 Sprint 22 adds retrieval-quality evaluation and recall ranking calibration seams: deterministic fixture-backed precision reporting via `GET /v0/continuity/retrieval-evaluation`, calibrated recall ordering that favors confirmed/fresher/current truth over stale/superseded alternatives, and explicit ordering posture evidence (`freshness`, `provenance`, `supersession`) in continuity recall API/UI surfaces. +- Phase 6 Sprint 23 adds correction-impact and freshness-hygiene reliability seams, including deterministic weekly review evidence fields for correction recurrence and freshness drift (`correction_recurrence_count`, `freshness_drift_count`) while preserving P6-S21/P6-S22 contracts. +- Phase 6 Sprint 24 adds trust dashboard and quality release evidence seams: `GET /v0/memories/trust-dashboard`, deterministic evidence generation via `python3 scripts/run_phase6_quality_evidence.py`, and additive quality evidence summary integration in Phase 4 readiness/release/validation reporting paths without changing GO/NO_GO semantics. +- Phase 6 is complete (`P6-S21` through `P6-S24` shipped). +- Phase 7 Sprint 25 adds deterministic chief-of-staff priority seams: + - `GET /v0/chief-of-staff` for ranked priorities with explicit posture labels, provenance-backed rationale, trust-aware confidence posture, and deterministic recommended next action. + - `/chief-of-staff` web dashboard for current priorities, rationale visibility, and explicit low-trust confidence downgrade rendering. +- Phase 7 Sprint 26 adds deterministic follow-through supervision seams on top of shipped P7-S25: + - `GET /v0/chief-of-staff` now includes deterministic `overdue_items`, `stale_waiting_for_items`, `slipped_commitments`, `escalation_posture`, and governed `draft_follow_up` artifact fields. + - follow-through recommendation actions are deterministic and explicit (`nudge`, `defer`, `escalate`, `close_loop_candidate`) with rationale per item. + - draft follow-ups remain approval-bounded artifacts only (`draft_only`, no autonomous external send). + - `/chief-of-staff` web now renders a dedicated follow-through supervision panel alongside the priority panel. +- Phase 7 Sprint 27 adds deterministic preparation and resumption supervision seams on top of shipped P7-S25/P7-S26: + - `GET /v0/chief-of-staff` now also includes deterministic `preparation_brief`, `what_changed_summary`, `prep_checklist`, `suggested_talking_points`, and `resumption_supervision`. + - preparation and resumption artifacts are provenance-backed and explicitly trust-calibrated. + - low-trust memory posture explicitly downgrades preparation/resumption recommendation confidence. + - `/chief-of-staff` web now renders a dedicated preparation panel with rationale and provenance visibility. +- Phase 7 Sprint 28 adds deterministic weekly review and recommendation outcome-learning seams on top of shipped P7-S25/P7-S26/P7-S27: + - `GET /v0/chief-of-staff` now also includes `weekly_review_brief`, `recommendation_outcomes`, `priority_learning_summary`, and `pattern_drift_summary`. + - `POST /v0/chief-of-staff/recommendation-outcomes` captures explicit recommendation handling outcomes (`accept`, `defer`, `ignore`, `rewrite`) as auditable continuity records. + - weekly review guidance now explicitly ranks close/defer/escalate actions with deterministic rationale. + - `/chief-of-staff` web now renders a weekly review and learning panel with outcome-capture controls and drift visibility. +- Phase 8 Sprint 29 adds deterministic chief-of-staff action handoff artifacts on top of shipped P7 outputs: + - `GET /v0/chief-of-staff` now also includes `action_handoff_brief`, `handoff_items`, `task_draft`, `approval_draft`, and explicit `execution_posture`. + - handoff mapping deterministically selects top recommendations from priority/follow-through/preparation/weekly-review signals and emits governed task/approval draft structures with rationale + provenance. + - execution posture is explicit and non-autonomous (`approval_bounded_artifact_only`, `approval_required=true`, no autonomous side effects). + - `/chief-of-staff` web now renders an action handoff panel showing posture, primary task/approval drafts, and per-item rationale/provenance. +- Phase 8 Sprint 30 adds deterministic chief-of-staff handoff queue and operational review seams on top of shipped P8-S29: + - `GET /v0/chief-of-staff` now also includes `handoff_queue_summary`, `handoff_queue_groups`, and `handoff_review_actions`. + - queue lifecycle posture is explicit (`ready`, `pending_approval`, `executed`, `stale`, `expired`) with deterministic grouped ordering and stale/expired visibility. + - `POST /v0/chief-of-staff/handoff-review-actions` captures explicit operator lifecycle transitions as auditable continuity records. + - `/chief-of-staff` web now renders a grouped handoff queue panel with explicit review-action controls and review-action history. +- Phase 8 Sprint 31 adds deterministic governed execution routing seams on top of shipped P8-S29/P8-S30: + - `GET /v0/chief-of-staff` now also includes `execution_routing_summary`, `routed_handoff_items`, `routing_audit_trail`, and `execution_readiness_posture`. + - `POST /v0/chief-of-staff/execution-routing-actions` captures explicit routing transitions into governed draft targets (`task_workflow_draft`, `approval_workflow_draft`, `follow_up_draft_only`). + - routing transitions are explicit and auditable (`routed`, `reaffirmed`) while preserving approval-required, draft-only non-autonomous execution posture. + - `/chief-of-staff` web now renders an execution routing panel with readiness posture visibility, route controls, and transition history. +- Phase 8 Sprint 32 adds deterministic outcome-learning and closure-quality seams on top of shipped P8-S29/P8-S30/P8-S31: + - `GET /v0/chief-of-staff` now also includes `handoff_outcome_summary`, `handoff_outcomes`, `closure_quality_summary`, `conversion_signal_summary`, and `stale_ignored_escalation_posture`. + - `POST /v0/chief-of-staff/handoff-outcomes` captures explicit routed-handoff outcomes (`reviewed`, `approved`, `rejected`, `rewritten`, `executed`, `ignored`, `expired`) as immutable continuity records. + - latest-state deterministic rollups now expose closure quality posture, recommendation-to-execution/closure conversion rates, and stale+ignored escalation posture signals. + - `/chief-of-staff` web now renders an outcome-learning panel with explicit outcome-capture controls and closure/conversion/escalation visibility. +- Phase 7 is complete (`P7-S25` through `P7-S28` shipped). +- Phase 8 Sprint 29 is shipped baseline. +- Phase 8 Sprint 30 is shipped baseline. +- Phase 8 Sprint 31 is shipped baseline. +- Active post-Phase-7 sprint is Phase 8 Sprint 32: outcome learning and closure quality. +- Phase 8 planning anchors are: + - `docs/phase8-product-spec.md` + - `docs/phase8-sprint-29-32-plan.md` +- The backend baseline now includes continuity APIs, deterministic context compilation, governed request routing, approvals and execution review, typed memory and open-loop seams, deterministic thread resumption brief reads, unified explicit-signal capture seams, explicit task and task-step lifecycle seams, rooted local workspaces and artifact ingestion, artifact retrieval and embeddings, narrow read-only Gmail and Calendar seams with selected-item ingestion, bounded read-only Calendar event discovery for one connected account, and the no-tools assistant-response seam. +- The frontend baseline is now real product surface, not scaffolding: the Next.js operator shell ships `/`, `/chat`, `/approvals`, `/tasks`, `/artifacts`, `/gmail`, `/calendar`, `/memories`, `/chief-of-staff`, `/entities`, and `/traces`, with live-backend reads when configured and explicit fixture fallback when they are not. +- `/chat` now uses selected-thread continuity instead of a raw thread-id-first flow, keeps bounded thread review and deterministic resumption brief review visible beside both assistant and governed-request composition, ships thread-linked governed workflow, ordered task-step timeline review, and bounded explain-why trace embedding, and includes manual explicit-signal capture controls for selected `message.user` events. +- `/gmail` and `/calendar` are shipped bounded connector workspaces in the shell: account review, selected-account detail, explicit connect, and one selected-item ingestion path into one chosen task workspace. The API baseline also includes bounded Calendar event discovery for one connected account with deterministic ordering and bounded limits. +- `/memories`, `/entities`, and `/artifacts` are shipped bounded review workspaces in the shell, not planned surface. +- Gate ownership is canonicalized to Phase 4 runner scripts (`scripts/run_phase4_*.py`), while `python3 scripts/run_phase3_validation_matrix.py`, `python3 scripts/run_phase2_validation_matrix.py`, and `python3 scripts/run_mvp_validation_matrix.py` remain compatibility guarantees. +- Historical sprint detail belongs in build and review artifacts, not in this roadmap. + +## Next Delivery Focus + +### Build From The Shipped API Plus Web-Shell Baseline + +- Plan the next sprint from the implemented Phase 3 Sprint 9 backend-plus-web baseline, not from older pre-Phase-3 narratives. +- Treat transcript continuity, thread-linked workflow review, task-step timeline review, bounded explain-why embedding in `/chat`, deterministic resumption brief review, manual explicit-signal capture controls, the shipped review workspaces (`/memories`, `/entities`, `/artifacts`), the shipped connector workspaces (`/gmail`, `/calendar`), and bounded Calendar event discovery as baseline, not pending work. +- Treat `python3 scripts/run_phase4_release_candidate.py` as the canonical MVP release-candidate rehearsal command and evidence contract (latest summary + append-only archive ledger). +- Treat `python3 scripts/verify_phase4_rc_archive.py` as the canonical archive audit verification command. +- Treat `python3 scripts/generate_phase4_mvp_exit_manifest.py` and `python3 scripts/verify_phase4_mvp_exit_manifest.py` as required Phase 4 closeout commands for deterministic MVP phase-exit evidence. +- Treat `python3 scripts/run_phase4_mvp_qualification.py` and `python3 scripts/verify_phase4_mvp_signoff_record.py` as the canonical Sprint 19 MVP qualification/sign-off commands. +- Treat archive index hardening as baseline behavior (deterministic lock and atomic index replace), not optional operational guidance. +- Treat the deterministic validation matrix command (`python3 scripts/run_phase4_validation_matrix.py`) as the canonical Phase 4 validation step inside the RC rehearsal chain, while keeping Phase 3/Phase 2/MVP validation commands as compatibility checks. +- Treat Phase 5 Sprint 17 through Sprint 20 continuity surfaces as shipped baseline: + - `/v0/continuity/captures*` + - `/v0/continuity/recall` + - `/v0/continuity/resumption-brief` + - `/v0/continuity/review-queue*` + - `/v0/continuity/open-loops` + - `/v0/continuity/daily-brief` + - `/v0/continuity/weekly-review` + - `/v0/continuity/open-loops/{continuity_object_id}/review-action` + - `/continuity` capture/recall/resumption/review/open-loop/daily/weekly workspace +- Do not relitigate continuity backbone, recall ordering contracts, correction-event append semantics, open-loop posture contracts, or required resumption/brief section contracts. +- Favor one narrow seam that deepens operator use of already shipped contracts before widening connector breadth or orchestration scope. +- Reuse the existing continuity, response, approval, task, workspace-artifact, memory, entity, execution, and trace surfaces instead of introducing parallel contracts. + +### Keep New Scope Narrow + +- Do not bundle broader Gmail or Calendar breadth, auth expansion, richer document parsing, runner orchestration, or proxy breadth into the next sprint by default. +- Do not reopen schema or API design unless the next sprint explicitly requires it. +- Keep Phase 5 follow-up scope explicit: + - no additional Sprint 17-20 continuity scope remains + - P6-S21 memory-quality gate alignment and deterministic review prioritization is now shipped baseline + - P6-S22 retrieval-quality calibration is now shipped baseline + - P6-S23 correction impact and freshness hygiene is now shipped baseline + - P6-S24 trust dashboard and quality release evidence is now shipped baseline + - P7-S25 priority engine and chief-of-staff dashboard is now shipped baseline + - P7-S26 follow-through supervision is now shipped baseline + - P7-S27 preparation briefs and resumption supervision is now shipped baseline + - P7-S28 weekly review and outcome learning is now shipped baseline + - P8-S29 action handoff artifacts are now shipped baseline + - P8-S30 handoff queue and operational review is now shipped baseline + - P8-S31 governed execution routing is now shipped baseline + - active post-Phase-7 packet is P8-S32 outcome learning and closure quality + - do not reopen P6-S21/P6-S22/P6-S23/P6-S24 contracts while operating on post-P6-S24 follow-up scope + - do not reopen P7-S25/P7-S26/P7-S27/P7-S28 semantics while executing P8-S32 + - do not reopen P8-S29 handoff-generation semantics while executing P8-S32 + - do not reopen P8-S30 queue/review semantics while executing P8-S32 + - do not reopen P8-S31 routing semantics while executing P8-S32 + - do not fold post-Phase-5 work back into shipped Sprint 20 seams +- Keep live docs synchronized with shipped reality so planning does not drift behind the repo again. + +## Ongoing Risks + +- Memory extraction and retrieval quality remain the main product risk. +- Auth remains incomplete beyond the current database user-context model. +- The operator shell is now shipped surface, including `/gmail` and `/calendar`, so future drift between web UI behavior, backend seams, and canonical docs is a planning and review risk. +- Connector and document boundaries are still intentionally narrow; broadening them safely will require separate explicit sprints. + +## Deferred Until Explicitly Opened + +- Gmail search, mailbox sync, attachment ingestion, write-capable Gmail actions, and broader Calendar capabilities such as recurrence expansion, sync, and write actions +- runner-style orchestration and automatic multi-step progression +- richer document parsing, OCR, and layout-aware ingestion +- broader tool execution breadth beyond the current governed `proxy.echo` seam diff --git a/docs/archive/planning/2026-04-07-phase9-bootstrap/RULES.md b/docs/archive/planning/2026-04-07-phase9-bootstrap/RULES.md new file mode 100644 index 0000000..310fccd --- /dev/null +++ b/docs/archive/planning/2026-04-07-phase9-bootstrap/RULES.md @@ -0,0 +1,41 @@ +# Rules + +## Truth And Scope + +- The active sprint packet is the top scope boundary for implementation work. +- Treat `.ai/active/SPRINT_PACKET.md` as an input/control artifact: do not edit it during implementation unless Control Tower explicitly changes the sprint. +- Never describe planned behavior as already implemented. +- Keep canonical truth files concise, current, and durable. +- Shared runbooks and canonical docs must use machine-independent commands and links; do not use local user-home absolute paths. +- When a sprint changes the operating baseline, update canonical truth docs in the same sprint before handoff. +- Archive stale planning or history material instead of deleting it when traceability still matters. +- Do not widen product scope without an explicit roadmap or sprint change. + +## Product And Safety + +- Never execute a consequential external action without explicit user approval. +- Treat explainability as a product feature, not an internal debugging aid. +- Treat the repeat magnesium reorder as the v1 release-readiness validation scenario. +- Do not add proactive automation, write-capable connectors, voice, or browser automation without an explicit roadmap change. + +## Architecture And Data + +- Treat the immutable event store as ground truth; downstream memories, tasks, and summaries are derived or governed views. +- Always compile context per invocation from durable sources. +- Keep prompt assembly, tool schemas, and serialized context ordering deterministic. +- Treat Postgres as the v1 system of record unless measured constraints justify a change. +- Task-step lineage and execution linkage must stay explicit; do not reconstruct them heuristically from broader task history. +- Enforce row-level security on every user-owned table. +- Connector secrets must not be stored on normal metadata tables or exposed on read surfaces; they must use a dedicated protected storage seam. +- Default memory admission to `NOOP`; promote only evidence-backed changes and preserve revision history for non-`NOOP` updates. +- Apply domain and sensitivity filters before semantic retrieval. + +## Delivery And Testing + +- Build against typed contracts and migration-backed schemas first. +- Keep changes small, module-scoped, and test-backed. +- Never bypass policy, approval, or proxy boundaries to introduce side effects. +- Schema changes are not complete without forward and rollback coverage. +- Every module needs unit tests and at least one integration boundary test. +- Approval boundaries, row-level security, audit logging, and lineage changes require adversarial tests. +- Do not make memory-quality or retrieval-quality release claims without labeled evaluation evidence. diff --git a/docs/phase9-sprint-33-38-plan.md b/docs/phase9-sprint-33-38-plan.md index c2230cd..8d7f91f 100644 --- a/docs/phase9-sprint-33-38-plan.md +++ b/docs/phase9-sprint-33-38-plan.md @@ -10,7 +10,7 @@ Sprint IDs in this document are Phase 9-local (`P9-S33` to `P9-S38`) to avoid am Ship Alice as a public memory and continuity engine that technical users can install and connect to external agents quickly. -## Sprint 33 (P9-S33) +## Sprint 33 (P9-S33) (shipped baseline) ### Title @@ -54,7 +54,7 @@ Turn the current internal system into a public-safe, installable core with one d - MCP interoperability - broad importer set -## Sprint 34 (P9-S34) +## Sprint 34 (P9-S34) (shipped baseline) ### Title @@ -96,7 +96,7 @@ Make Alice useful without requiring the internal operator shell. - MCP server - external integrations -## Sprint 35 (P9-S35) +## Sprint 35 (P9-S35) (shipped baseline) ### Title @@ -141,7 +141,7 @@ Make Alice usable immediately by external assistants through a stable small tool - broad tool surface expansion - remote hosted auth systems -## Sprint 36 (P9-S36) +## Sprint 36 (P9-S36) (current delivery) ### Title diff --git a/fixtures/openclaw/workspace_v1.json b/fixtures/openclaw/workspace_v1.json new file mode 100644 index 0000000..0e0c8af --- /dev/null +++ b/fixtures/openclaw/workspace_v1.json @@ -0,0 +1,88 @@ +{ + "fixture_id": "openclaw-s36-workspace-v1", + "workspace": { + "id": "openclaw-workspace-demo-001", + "name": "OpenClaw Interop Demo" + }, + "durable_memory": [ + { + "id": "oc-memory-001", + "type": "decision", + "status": "active", + "content": "Keep MCP tool surface narrow during Phase 9 interop rollout.", + "thread_id": "cccccccc-cccc-4ccc-8ccc-cccccccccccc", + "task_id": "dddddddd-dddd-4ddd-8ddd-dddddddddddd", + "project": "Alice Public Core", + "person": "Interop Owner", + "source_event_ids": [ + "openclaw-event-0001" + ], + "confirmation_status": "confirmed", + "confidence": 0.97, + "tags": [ + "release", + "interop" + ] + }, + { + "id": "oc-memory-002", + "type": "next_action", + "status": "active", + "content": "Run OpenClaw fixture import before CLI recall and resume checks.", + "thread_id": "cccccccc-cccc-4ccc-8ccc-cccccccccccc", + "task_id": "dddddddd-dddd-4ddd-8ddd-dddddddddddd", + "project": "Alice Public Core", + "person": "Build Engineer", + "source_event_ids": [ + "openclaw-event-0002" + ], + "confirmation_status": "confirmed", + "confidence": 0.93 + }, + { + "id": "oc-memory-003", + "type": "waiting_for", + "status": "active", + "content": "Wait for reviewer PASS after OpenClaw adapter verification.", + "thread_id": "cccccccc-cccc-4ccc-8ccc-cccccccccccc", + "task_id": "dddddddd-dddd-4ddd-8ddd-dddddddddddd", + "project": "Alice Public Core", + "person": "Control Tower", + "source_event_ids": [ + "openclaw-event-0003" + ], + "confirmation_status": "unconfirmed", + "confidence": 0.9 + }, + { + "id": "oc-memory-004", + "type": "commitment", + "status": "completed", + "content": "Document the import boundary in ADR-004.", + "thread_id": "cccccccc-cccc-4ccc-8ccc-cccccccccccc", + "task_id": "dddddddd-dddd-4ddd-8ddd-dddddddddddd", + "project": "Alice Public Core", + "person": "Docs Owner", + "source_event_ids": [ + "openclaw-event-0004" + ], + "confirmation_status": "confirmed", + "confidence": 0.91 + }, + { + "id": "oc-memory-004", + "type": "commitment", + "status": "completed", + "content": "Document the import boundary in ADR-004.", + "thread_id": "cccccccc-cccc-4ccc-8ccc-cccccccccccc", + "task_id": "dddddddd-dddd-4ddd-8ddd-dddddddddddd", + "project": "Alice Public Core", + "person": "Docs Owner", + "source_event_ids": [ + "openclaw-event-0004" + ], + "confirmation_status": "confirmed", + "confidence": 0.91 + } + ] +} diff --git a/scripts/load_openclaw_sample_data.py b/scripts/load_openclaw_sample_data.py new file mode 100755 index 0000000..c977836 --- /dev/null +++ b/scripts/load_openclaw_sample_data.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +from pathlib import Path +import sys +from uuid import UUID + + +REPO_ROOT = Path(__file__).resolve().parents[1] +API_SRC = REPO_ROOT / "apps" / "api" / "src" +if str(API_SRC) not in sys.path: + sys.path.insert(0, str(API_SRC)) + +from alicebot_api.db import user_connection +from alicebot_api.openclaw_import import import_openclaw_source +from alicebot_api.store import ContinuityStore + + +DEFAULT_DATABASE_URL = "postgresql://alicebot_app:alicebot_app@localhost:5432/alicebot" +DEFAULT_AUTH_USER_ID = "00000000-0000-0000-0000-000000000001" +DEFAULT_SOURCE_PATH = REPO_ROOT / "fixtures" / "openclaw" / "workspace_v1.json" + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Import OpenClaw sample workspace data into Alice continuity objects." + ) + parser.add_argument( + "--source", + default=os.getenv("OPENCLAW_SAMPLE_DATA_PATH", str(DEFAULT_SOURCE_PATH)), + help="Path to an OpenClaw workspace/export file or directory.", + ) + parser.add_argument( + "--database-url", + default=os.getenv("DATABASE_URL", DEFAULT_DATABASE_URL), + help="Database URL used for writes.", + ) + parser.add_argument( + "--user-id", + default=os.getenv("ALICEBOT_AUTH_USER_ID", DEFAULT_AUTH_USER_ID), + help="User ID to own imported OpenClaw data.", + ) + parser.add_argument( + "--user-email", + default=os.getenv("ALICEBOT_IMPORT_USER_EMAIL", "openclaw-sample@example.com"), + help="Email for auto-created user when --user-id is not found.", + ) + parser.add_argument( + "--display-name", + default=os.getenv("ALICEBOT_IMPORT_USER_DISPLAY_NAME", "OpenClaw Sample User"), + help="Display name for auto-created user when --user-id is not found.", + ) + return parser.parse_args() + + +def _ensure_user(store: ContinuityStore, *, user_id: UUID, email: str, display_name: str) -> None: + with store.conn.cursor() as cur: + cur.execute("SELECT 1 FROM users WHERE id = %s", (user_id,)) + exists = cur.fetchone() is not None + if exists: + return + store.create_user(user_id, email, display_name) + + +def main() -> int: + args = _parse_args() + source_path = Path(args.source).expanduser().resolve() + user_id = UUID(str(args.user_id)) + + with user_connection(args.database_url, user_id) as conn: + store = ContinuityStore(conn) + _ensure_user( + store, + user_id=user_id, + email=str(args.user_email), + display_name=str(args.display_name), + ) + summary = import_openclaw_source( + store, + user_id=user_id, + source=source_path, + ) + + print( + json.dumps( + { + **summary, + "user_id": str(user_id), + "source_path": str(source_path), + }, + indent=2, + sort_keys=True, + ) + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/load_openclaw_sample_data.sh b/scripts/load_openclaw_sample_data.sh new file mode 100755 index 0000000..02a9cf9 --- /dev/null +++ b/scripts/load_openclaw_sample_data.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd -- "${SCRIPT_DIR}/.." && pwd)" + +if [ -f "${REPO_ROOT}/.env" ]; then + set -a + . "${REPO_ROOT}/.env" + set +a +fi + +PYTHON_BIN="python3" +if [ -x "${REPO_ROOT}/.venv/bin/python" ]; then + PYTHON_BIN="${REPO_ROOT}/.venv/bin/python" +fi + +cd "${REPO_ROOT}" + +exec "${PYTHON_BIN}" "${REPO_ROOT}/scripts/load_openclaw_sample_data.py" "$@" diff --git a/tests/integration/test_openclaw_import.py b/tests/integration/test_openclaw_import.py new file mode 100644 index 0000000..243430f --- /dev/null +++ b/tests/integration/test_openclaw_import.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +from pathlib import Path +from uuid import UUID, uuid4 + +from alicebot_api.continuity_recall import query_continuity_recall +from alicebot_api.continuity_resumption import compile_continuity_resumption_brief +from alicebot_api.contracts import ContinuityRecallQueryInput, ContinuityResumptionBriefRequestInput +from alicebot_api.db import user_connection +from alicebot_api.openclaw_import import import_openclaw_source +from alicebot_api.store import ContinuityStore + + +REPO_ROOT = Path(__file__).resolve().parents[2] +OPENCLAW_FIXTURE_PATH = REPO_ROOT / "fixtures" / "openclaw" / "workspace_v1.json" +THREAD_ID = UUID("cccccccc-cccc-4ccc-8ccc-cccccccccccc") + + +def seed_user(database_url: str, *, email: str) -> UUID: + user_id = uuid4() + with user_connection(database_url, user_id) as conn: + ContinuityStore(conn).create_user(user_id, email, email.split("@", 1)[0].title()) + return user_id + + +def test_openclaw_import_supports_recall_resumption_and_idempotent_dedupe(migrated_database_urls) -> None: + user_id = seed_user(migrated_database_urls["app"], email="openclaw-import@example.com") + + with user_connection(migrated_database_urls["app"], user_id) as conn: + store = ContinuityStore(conn) + + first_import = import_openclaw_source( + store, + user_id=user_id, + source=OPENCLAW_FIXTURE_PATH, + ) + + assert first_import["status"] == "ok" + assert first_import["fixture_id"] == "openclaw-s36-workspace-v1" + assert first_import["workspace_id"] == "openclaw-workspace-demo-001" + assert first_import["total_candidates"] == 5 + assert first_import["imported_count"] == 4 + assert first_import["skipped_duplicates"] == 1 + assert first_import["provenance_source_kind"] == "openclaw_import" + + recall = query_continuity_recall( + store, + user_id=user_id, + request=ContinuityRecallQueryInput( + thread_id=THREAD_ID, + project="Alice Public Core", + limit=20, + ), + ) + + assert recall["summary"]["returned_count"] == 4 + assert all(item["provenance"]["source_kind"] == "openclaw_import" for item in recall["items"]) + assert all( + item["provenance"].get("openclaw_workspace_id") == "openclaw-workspace-demo-001" + for item in recall["items"] + ) + + resumption = compile_continuity_resumption_brief( + store, + user_id=user_id, + request=ContinuityResumptionBriefRequestInput( + thread_id=THREAD_ID, + max_recent_changes=10, + max_open_loops=10, + ), + ) + + brief = resumption["brief"] + assert brief["last_decision"]["item"] is not None + assert brief["last_decision"]["item"]["provenance"]["source_kind"] == "openclaw_import" + assert brief["next_action"]["item"] is not None + assert brief["next_action"]["item"]["provenance"]["source_kind"] == "openclaw_import" + + second_import = import_openclaw_source( + store, + user_id=user_id, + source=OPENCLAW_FIXTURE_PATH, + ) + + assert second_import["status"] == "noop" + assert second_import["total_candidates"] == 5 + assert second_import["imported_count"] == 0 + assert second_import["skipped_duplicates"] == 5 diff --git a/tests/integration/test_openclaw_mcp_integration.py b/tests/integration/test_openclaw_mcp_integration.py new file mode 100644 index 0000000..fee8e42 --- /dev/null +++ b/tests/integration/test_openclaw_mcp_integration.py @@ -0,0 +1,178 @@ +from __future__ import annotations + +import json +import os +from pathlib import Path +import subprocess +import sys +from typing import Any +from uuid import UUID, uuid4 + +from alicebot_api.db import user_connection +from alicebot_api.openclaw_import import import_openclaw_source +from alicebot_api.store import ContinuityStore + + +REPO_ROOT = Path(__file__).resolve().parents[2] +OPENCLAW_FIXTURE_PATH = REPO_ROOT / "fixtures" / "openclaw" / "workspace_v1.json" +THREAD_ID = UUID("cccccccc-cccc-4ccc-8ccc-cccccccccccc") + + +def seed_user(database_url: str, *, email: str) -> UUID: + user_id = uuid4() + with user_connection(database_url, user_id) as conn: + ContinuityStore(conn).create_user(user_id, email, email.split("@", 1)[0].title()) + return user_id + + +def build_runtime_env(*, database_url: str, user_id: UUID) -> dict[str, str]: + env = os.environ.copy() + env["DATABASE_URL"] = database_url + env["ALICEBOT_AUTH_USER_ID"] = str(user_id) + pythonpath_entries = [str(REPO_ROOT / "apps" / "api" / "src"), str(REPO_ROOT / "workers")] + existing_pythonpath = env.get("PYTHONPATH") + if existing_pythonpath: + pythonpath_entries.append(existing_pythonpath) + env["PYTHONPATH"] = os.pathsep.join(pythonpath_entries) + return env + + +def _write_mcp_message(stream, payload: dict[str, object]) -> None: + encoded = json.dumps(payload, separators=(",", ":"), sort_keys=True).encode("utf-8") + stream.write(f"Content-Length: {len(encoded)}\r\n\r\n".encode("ascii")) + stream.write(encoded) + stream.flush() + + +def _read_mcp_message(stream) -> dict[str, object]: + headers: dict[str, str] = {} + while True: + line = stream.readline() + if line == b"": + raise RuntimeError("MCP server closed stdout unexpectedly") + if line in {b"\r\n", b"\n"}: + break + decoded = line.decode("utf-8").strip() + key, value = decoded.split(":", 1) + headers[key.strip().lower()] = value.strip() + + content_length = int(headers["content-length"]) + body = stream.read(content_length) + return json.loads(body.decode("utf-8")) + + +class MCPClient: + def __init__(self, process: subprocess.Popen[bytes]) -> None: + self.process = process + self._next_id = 1 + + def request(self, method: str, params: dict[str, object] | None = None) -> dict[str, object]: + request_id = self._next_id + self._next_id += 1 + payload: dict[str, object] = {"jsonrpc": "2.0", "id": request_id, "method": method} + if params is not None: + payload["params"] = params + assert self.process.stdin is not None + _write_mcp_message(self.process.stdin, payload) + assert self.process.stdout is not None + response = _read_mcp_message(self.process.stdout) + assert response.get("id") == request_id + return response + + def notify(self, method: str, params: dict[str, object] | None = None) -> None: + payload: dict[str, object] = {"jsonrpc": "2.0", "method": method} + if params is not None: + payload["params"] = params + assert self.process.stdin is not None + _write_mcp_message(self.process.stdin, payload) + + def close(self) -> None: + if self.process.poll() is None: + self.process.terminate() + try: + self.process.wait(timeout=5) + except subprocess.TimeoutExpired: + self.process.kill() + self.process.wait(timeout=5) + + +def start_mcp_client(*, database_url: str, user_id: UUID) -> MCPClient: + env = build_runtime_env(database_url=database_url, user_id=user_id) + process = subprocess.Popen( + [sys.executable, "-m", "alicebot_api.mcp_server"], + cwd=REPO_ROOT, + env=env, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=False, + ) + + client = MCPClient(process=process) + initialize = client.request( + "initialize", + params={ + "protocolVersion": "2024-11-05", + "clientInfo": {"name": "pytest-openclaw-mcp", "version": "1.0"}, + "capabilities": {}, + }, + ) + assert initialize["result"]["protocolVersion"] == "2024-11-05" + client.notify("notifications/initialized", {}) + return client + + +def _call_tool(client: MCPClient, *, name: str, arguments: dict[str, object]) -> dict[str, Any]: + response = client.request("tools/call", params={"name": name, "arguments": arguments}) + assert "error" not in response + result = response["result"] + assert result["isError"] is False + return result["structuredContent"] + + +def test_openclaw_imported_data_is_usable_from_shipped_mcp_recall_and_resume_tools( + migrated_database_urls, +) -> None: + user_id = seed_user(migrated_database_urls["app"], email="openclaw-mcp@example.com") + + with user_connection(migrated_database_urls["app"], user_id) as conn: + store = ContinuityStore(conn) + summary = import_openclaw_source( + store, + user_id=user_id, + source=OPENCLAW_FIXTURE_PATH, + ) + assert summary["imported_count"] == 4 + + client = start_mcp_client(database_url=migrated_database_urls["app"], user_id=user_id) + try: + recall_payload = _call_tool( + client, + name="alice_recall", + arguments={ + "thread_id": str(THREAD_ID), + "project": "Alice Public Core", + "query": "MCP tool surface", + "limit": 20, + }, + ) + resume_payload = _call_tool( + client, + name="alice_resume", + arguments={ + "thread_id": str(THREAD_ID), + "max_recent_changes": 10, + "max_open_loops": 10, + }, + ) + finally: + client.close() + + assert recall_payload["summary"]["returned_count"] >= 1 + assert any(item["provenance"]["source_kind"] == "openclaw_import" for item in recall_payload["items"]) + + brief = resume_payload["brief"] + assert brief["last_decision"]["item"] is not None + assert brief["last_decision"]["item"]["provenance"]["source_kind"] == "openclaw_import" + assert brief["next_action"]["item"] is not None + assert brief["next_action"]["item"]["provenance"]["source_kind"] == "openclaw_import" diff --git a/tests/unit/test_openclaw_adapter.py b/tests/unit/test_openclaw_adapter.py new file mode 100644 index 0000000..93f4d65 --- /dev/null +++ b/tests/unit/test_openclaw_adapter.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from alicebot_api.openclaw_adapter import OpenClawAdapterValidationError, load_openclaw_payload + + +REPO_ROOT = Path(__file__).resolve().parents[2] +FIXTURE_PATH = REPO_ROOT / "fixtures" / "openclaw" / "workspace_v1.json" + + +def test_openclaw_adapter_loads_fixture_with_deterministic_mapping() -> None: + batch = load_openclaw_payload(FIXTURE_PATH) + + assert batch.context.fixture_id == "openclaw-s36-workspace-v1" + assert batch.context.workspace_id == "openclaw-workspace-demo-001" + assert batch.context.workspace_name == "OpenClaw Interop Demo" + assert len(batch.items) == 5 + + first = batch.items[0] + assert first.source_item_id == "oc-memory-001" + assert first.object_type == "Decision" + assert first.status == "active" + assert first.raw_content == "Decision: Keep MCP tool surface narrow during Phase 9 interop rollout." + assert first.title == "Decision: Keep MCP tool surface narrow during Phase 9 interop rollout." + assert first.body["decision_text"] == "Keep MCP tool surface narrow during Phase 9 interop rollout." + assert first.source_provenance["thread_id"] == "cccccccc-cccc-4ccc-8ccc-cccccccccccc" + assert first.source_provenance["task_id"] == "dddddddd-dddd-4ddd-8ddd-dddddddddddd" + assert first.source_provenance["project"] == "Alice Public Core" + assert first.source_provenance["person"] == "Interop Owner" + assert first.source_provenance["source_event_ids"] == ["openclaw-event-0001"] + assert first.confidence == 0.97 + assert len(first.dedupe_key) == 64 + + +def test_openclaw_adapter_emits_stable_dedupe_keys() -> None: + first = load_openclaw_payload(FIXTURE_PATH) + second = load_openclaw_payload(FIXTURE_PATH) + + assert [item.dedupe_key for item in first.items] == [item.dedupe_key for item in second.items] + + +def test_openclaw_adapter_supports_directory_workspace_contract(tmp_path: Path) -> None: + workspace_payload = { + "workspace": { + "id": "oc-ws-dir-1", + "name": "Directory Workspace", + } + } + memory_payload = { + "durable_memory": [ + { + "id": "oc-dir-001", + "type": "next_action", + "content": "Ship directory contract parsing.", + "thread_id": "cccccccc-cccc-4ccc-8ccc-cccccccccccc", + } + ] + } + + (tmp_path / "workspace.json").write_text(json.dumps(workspace_payload), encoding="utf-8") + (tmp_path / "durable_memory.json").write_text(json.dumps(memory_payload), encoding="utf-8") + + batch = load_openclaw_payload(tmp_path) + + assert batch.context.workspace_id == "oc-ws-dir-1" + assert batch.context.workspace_name == "Directory Workspace" + assert len(batch.items) == 1 + assert batch.items[0].object_type == "NextAction" + + +def test_openclaw_adapter_rejects_invalid_payload() -> None: + with pytest.raises(OpenClawAdapterValidationError, match="invalid JSON"): + load_openclaw_payload(REPO_ROOT / "pyproject.toml") + + +def test_openclaw_adapter_rejects_unknown_status_value(tmp_path: Path) -> None: + payload = { + "workspace": { + "id": "oc-ws-status-1", + "name": "Status Validation Workspace", + }, + "durable_memory": [ + { + "id": "oc-status-001", + "type": "decision", + "status": "paused", + "content": "Do not silently coerce unknown statuses.", + } + ], + } + source = tmp_path / "workspace.json" + source.write_text(json.dumps(payload), encoding="utf-8") + + with pytest.raises(OpenClawAdapterValidationError, match="status must be one of"): + load_openclaw_payload(source)