From 8515c6202d9b8fb1b21d7e3546cb3424a766e48b Mon Sep 17 00:00:00 2001 From: Sami Rusani Date: Wed, 8 Apr 2026 11:02:12 +0200 Subject: [PATCH] P9-S37: ship importers and eval harness --- .ai/active/SPRINT_PACKET.md | 284 +++++++----- .ai/handoff/CURRENT_STATE.md | 31 +- ARCHITECTURE.md | 20 +- BUILD_REPORT.md | 168 ++++--- README.md | 32 +- REVIEW_REPORT.md | 40 +- ROADMAP.md | 14 +- RULES.md | 2 + apps/api/src/alicebot_api/chatgpt_import.py | 401 +++++++++++++++++ apps/api/src/alicebot_api/importer_models.py | 283 ++++++++++++ .../src/alicebot_api/importers/__init__.py | 6 + apps/api/src/alicebot_api/importers/common.py | 158 +++++++ apps/api/src/alicebot_api/markdown_import.py | 335 ++++++++++++++ apps/api/src/alicebot_api/openclaw_import.py | 145 ++---- .../src/alicebot_api/retrieval_evaluation.py | 426 +++++++++++++++++- apps/web/components/approval-detail.test.tsx | 2 +- .../continuity-open-loops-panel.test.tsx | 6 +- .../workflow-memory-writeback-form.tsx | 6 +- ...5-import-provenance-and-dedupe-strategy.md | 45 ++ ...ADR-007-public-evaluation-harness-scope.md | 48 ++ docs/phase9-sprint-33-38-plan.md | 14 +- eval/baselines/phase9_s37_baseline.json | 207 +++++++++ eval/reports/phase9_eval_latest.json | 207 +++++++++ fixtures/importers/chatgpt/workspace_v1.json | 52 +++ fixtures/importers/markdown/workspace_v1.md | 19 + scripts/load_chatgpt_sample_data.py | 102 +++++ scripts/load_chatgpt_sample_data.sh | 20 + scripts/load_markdown_sample_data.py | 102 +++++ scripts/load_markdown_sample_data.sh | 20 + scripts/run_phase9_eval.py | 155 +++++++ scripts/run_phase9_eval.sh | 20 + tests/__init__.py | 0 tests/integration/__init__.py | 0 tests/integration/test_chatgpt_import.py | 91 ++++ tests/integration/test_markdown_import.py | 91 ++++ tests/integration/test_phase9_eval.py | 59 +++ tests/unit/__init__.py | 0 tests/unit/test_importers.py | 66 +++ tests/unit/test_phase9_eval.py | 33 ++ 39 files changed, 3352 insertions(+), 358 deletions(-) create mode 100644 apps/api/src/alicebot_api/chatgpt_import.py create mode 100644 apps/api/src/alicebot_api/importer_models.py create mode 100644 apps/api/src/alicebot_api/importers/__init__.py create mode 100644 apps/api/src/alicebot_api/importers/common.py create mode 100644 apps/api/src/alicebot_api/markdown_import.py create mode 100644 docs/adr/ADR-005-import-provenance-and-dedupe-strategy.md create mode 100644 docs/adr/ADR-007-public-evaluation-harness-scope.md create mode 100644 eval/baselines/phase9_s37_baseline.json create mode 100644 eval/reports/phase9_eval_latest.json create mode 100644 fixtures/importers/chatgpt/workspace_v1.json create mode 100644 fixtures/importers/markdown/workspace_v1.md create mode 100755 scripts/load_chatgpt_sample_data.py create mode 100755 scripts/load_chatgpt_sample_data.sh create mode 100755 scripts/load_markdown_sample_data.py create mode 100755 scripts/load_markdown_sample_data.sh create mode 100755 scripts/run_phase9_eval.py create mode 100755 scripts/run_phase9_eval.sh create mode 100644 tests/__init__.py create mode 100644 tests/integration/__init__.py create mode 100644 tests/integration/test_chatgpt_import.py create mode 100644 tests/integration/test_markdown_import.py create mode 100644 tests/integration/test_phase9_eval.py create mode 100644 tests/unit/__init__.py create mode 100644 tests/unit/test_importers.py create mode 100644 tests/unit/test_phase9_eval.py diff --git a/.ai/active/SPRINT_PACKET.md b/.ai/active/SPRINT_PACKET.md index ed3351b..c7d7181 100644 --- a/.ai/active/SPRINT_PACKET.md +++ b/.ai/active/SPRINT_PACKET.md @@ -2,7 +2,7 @@ ## Sprint Title -Phase 9 Sprint 36 (P9-S36): OpenClaw Adapter +Phase 9 Sprint 37 (P9-S37): Importers and Evaluation Harness ## Sprint Type @@ -10,7 +10,7 @@ feature ## Sprint Reason -`P9-S33` shipped the public-safe `alice-core` boundary and startup path. `P9-S34` shipped the deterministic local CLI contract. `P9-S35` shipped the narrow MCP transport. The next non-redundant seam is proving Alice is agent-agnostic by wiring one concrete external adapter against the already-shipped CLI/MCP continuity contract. +`P9-S33` shipped the public-safe `alice-core` boundary and startup path. `P9-S34` shipped the deterministic local CLI contract. `P9-S35` shipped the narrow MCP transport. `P9-S36` shipped the first concrete external adapter via OpenClaw. The next non-redundant seam is broadening importer coverage and generating reproducible evidence that imported memory improves recall, resumption, and correction-aware continuity quality. ## Planning Anchors @@ -21,24 +21,26 @@ feature - `docs/adr/ADR-001-public-core-package-boundary.md` - `docs/adr/ADR-002-public-runtime-baseline.md` - `docs/adr/ADR-003-mcp-tool-surface-contract.md` -- `docs/adr/ADR-004-openclaw-integration-boundary.md` if introduced +- `docs/adr/ADR-004-openclaw-integration-boundary.md` +- `docs/adr/ADR-005-import-provenance-and-dedupe-strategy.md` if introduced +- `docs/adr/ADR-007-public-evaluation-harness-scope.md` if introduced ## Sprint Objective -Ship the first OpenClaw adapter path so a sample or real OpenClaw workspace can be imported into Alice, queried through Alice recall/resumption, and optionally consumed through the shipped MCP wedge without changing Alice continuity semantics. +Ship broader importer coverage plus a reproducible local evaluation harness so Alice can ingest at least three production-usable sources in total and generate baseline evidence for recall precision, resumption usefulness, correction effectiveness, and duplicate-memory posture. ## Git Instructions -- Branch Name: `codex/phase9-sprint-36-openclaw-adapter` +- Branch Name: `codex/phase9-sprint-37-importers-eval-harness` - Base Branch: `main` - PR Strategy: one sprint branch, one PR - Merge Policy: squash merge only after reviewer `PASS` and explicit Control Tower merge approval ## Why This Sprint Matters -- It is the first proof that Alice works as an interoperable memory layer, not just a standalone local tool. -- It validates the Phase 9 thesis using one real external agent stack instead of abstract compatibility claims. -- It sets the adapter/import boundary ahead of broader importer work in `P9-S37`. +- It moves Alice from a single-adapter proof to a credible public import story. +- It makes quality claims reproducible instead of anecdotal. +- It sets the evidence baseline that `P9-S38` launch docs and release claims must reflect. ## Redundancy Guard @@ -51,100 +53,125 @@ Ship the first OpenClaw adapter path so a sample or real OpenClaw workspace can - `P9-S33` public-safe packaging, startup path, and sample-data baseline. - `P9-S34` deterministic local CLI contract for continuity workflows. - `P9-S35` deterministic MCP transport for the shipped continuity contract. -- Required now (`P9-S36`): - - OpenClaw adapter/import boundary - - file-based import path for OpenClaw workspace or durable memory data - - imported provenance tagging and dedupe stance - - recall/resumption proof on imported OpenClaw material - - optional MCP augmentation proof using imported data through the shipped tool surface -- Explicitly out of `P9-S36`: - - broad importer set beyond the OpenClaw adapter path + - `P9-S36` OpenClaw adapter/import boundary with deterministic provenance and dedupe posture. +- Required now (`P9-S37`): + - broader importer coverage beyond OpenClaw + - at least three production-usable importers in total across shipped Phase 9 surfaces + - reproducible benchmark/evaluation harness + - baseline report generation from local fixtures and documented commands +- Explicitly out of `P9-S37`: + - launch narrative polish or release assets + - public release tagging and distribution work - widening the MCP tool surface - hosted deployment or remote auth work - - launch assets / public release polish - - reopening CLI or MCP semantics unless adapter integration exposes a real parity defect + - reopening OpenClaw adapter semantics except for truly shared importer fixes ## Design Truth -- OpenClaw integration should prove Alice can augment an external agent stack without becoming a generic platform wrapper. -- The adapter should map external state into shipped Alice continuity objects with explicit provenance, not bypass Alice’s trust and correction model. -- Imported material should remain queryable through the same recall/resumption semantics as native Alice data. -- The adapter boundary should stay narrow enough that later importer work can generalize from it. +- Importers should map outside data into the same shipped Alice continuity model, not create source-specific behavior islands. +- Provenance and dedupe posture must stay explicit across every importer, not only OpenClaw. +- Evaluation should measure useful continuity outcomes, not generic benchmark theatre. +- `P9-S37` should leave `P9-S38` with evidence to publish, not more product ambiguity. ## Exact Surfaces In Scope -- OpenClaw import/adapter module(s) -- file-based input contract for OpenClaw workspace or durable memory export -- import mapping into shipped Alice continuity objects -- provenance tagging and dedupe behavior for imported material -- one documented local demo path for import -> recall/resume -- optional MCP augmentation proof against imported data -- tests and fixtures for the adapter path +- at least two additional importer paths beyond OpenClaw, bringing shipped total importer coverage to at least three +- importer provenance and dedupe policy generalization +- reproducible fixtures for each newly shipped importer +- local evaluation harness and baseline report generation +- docs for importer usage and evaluation commands +- tests covering import success, dedupe posture, and evaluation script/report generation ## Exact Files In Scope - `.ai/active/SPRINT_PACKET.md` - `.ai/handoff/CURRENT_STATE.md` -- `ARCHITECTURE.md` - `README.md` - `ROADMAP.md` -- `RULES.md` +- `ARCHITECTURE.md` if importer/eval architecture notes need syncing +- `RULES.md` if importer/eval discipline needs canonization - `docs/phase9-sprint-33-38-plan.md` -- `pyproject.toml` if adapter packaging entrypoints are introduced -- `apps/api/src/alicebot_api/openclaw_adapter.py` if introduced -- `apps/api/src/alicebot_api/openclaw_models.py` if introduced -- `apps/api/src/alicebot_api/openclaw_import.py` if introduced -- `apps/api/src/alicebot_api/mcp_tools.py` if parity-alignment is required -- `apps/api/src/alicebot_api/continuity_capture.py` if adapter ingestion reuses capture helpers -- `apps/api/src/alicebot_api/continuity_recall.py` if import parity fixes are required -- `apps/api/src/alicebot_api/continuity_resumption.py` if import parity fixes are required +- `docs/adr/ADR-005-import-provenance-and-dedupe-strategy.md` if introduced +- `docs/adr/ADR-007-public-evaluation-harness-scope.md` if introduced +- `apps/api/src/alicebot_api/openclaw_import.py` if shared importer abstractions are factored carefully +- `apps/api/src/alicebot_api/openclaw_adapter.py` if shared importer fixes are required +- `apps/api/src/alicebot_api/importers/` if introduced +- `apps/api/src/alicebot_api/markdown_import.py` if introduced +- `apps/api/src/alicebot_api/chatgpt_import.py` if introduced +- `apps/api/src/alicebot_api/claude_import.py` if introduced +- `apps/api/src/alicebot_api/csv_import.py` if introduced +- `apps/api/src/alicebot_api/importer_models.py` if introduced +- `apps/api/src/alicebot_api/retrieval_evaluation.py` +- `apps/api/src/alicebot_api/continuity_recall.py` if eval parity fixes are required +- `apps/api/src/alicebot_api/continuity_resumption.py` if eval parity fixes are required - `apps/api/src/alicebot_api/store.py` -- `scripts/load_openclaw_sample_data.py` if introduced -- `scripts/load_openclaw_sample_data.sh` if introduced -- `fixtures/openclaw/` if introduced -- `docs/adr/ADR-004-openclaw-integration-boundary.md` if introduced -- `.ai/archive/planning/2026-04-07-phase9-bootstrap/` if bootstrap planning state is archived for traceability -- `docs/archive/planning/2026-04-07-phase9-bootstrap/` if canonical planning docs are archived for traceability -- `tests/unit/test_openclaw_adapter.py` if introduced -- `tests/integration/test_openclaw_import.py` if introduced -- `tests/integration/test_openclaw_mcp_integration.py` if introduced +- `apps/web/components/approval-detail.test.tsx` if required to keep the mandated web suite stable +- `apps/web/components/continuity-open-loops-panel.test.tsx` if required to keep the mandated web suite stable +- `apps/web/components/workflow-memory-writeback-form.tsx` if required to keep importer/eval-related submit state stable +- `scripts/load_markdown_sample_data.py` if introduced +- `scripts/load_markdown_sample_data.sh` if introduced +- `scripts/load_chatgpt_sample_data.py` if introduced +- `scripts/load_chatgpt_sample_data.sh` if introduced +- `scripts/load_claude_sample_data.py` if introduced +- `scripts/load_claude_sample_data.sh` if introduced +- `scripts/load_csv_sample_data.py` if introduced +- `scripts/load_csv_sample_data.sh` if introduced +- `scripts/run_phase9_eval.py` if introduced +- `scripts/run_phase9_eval.sh` if introduced +- `fixtures/importers/` if introduced +- `eval/` if introduced for reports/baselines/fixtures +- `tests/__init__.py` if introduced for package import stability +- `tests/unit/__init__.py` if introduced for package import stability +- `tests/integration/__init__.py` if introduced for package import stability +- `tests/unit/test_importers.py` if introduced +- `tests/unit/test_phase9_eval.py` if introduced +- `tests/integration/test_markdown_import.py` if introduced +- `tests/integration/test_chatgpt_import.py` if introduced +- `tests/integration/test_claude_import.py` if introduced +- `tests/integration/test_csv_import.py` if introduced +- `tests/integration/test_phase9_eval.py` if introduced - `BUILD_REPORT.md` - `REVIEW_REPORT.md` ## In Scope -- define the first-class OpenClaw adapter boundary -- import a sample or real OpenClaw workspace / durable memory export into Alice -- preserve source provenance on imported material -- make imported memory visible through Alice recall and resumption -- document exact local import and demo steps -- keep MCP augmentation proof limited to using the already-shipped tool surface on imported data +- ship at least two additional importer paths beyond OpenClaw +- reach at least three production-usable importers total by end of sprint +- preserve explicit provenance and deterministic dedupe posture across importers +- generate a reproducible local evaluation/baseline report +- keep the mandated backend/web verification suites stable when importer/eval changes expose adjacent regressions +- measure at minimum: + - recall precision + - resumption usefulness + - correction effectiveness + - importer success and duplicate-memory posture +- document exact importer and evaluation commands ## Out Of Scope -- generic importer framework for all sources -- ChatGPT/Claude/markdown/CSV importer bundle -- MCP tool-surface expansion -- hosted adapter services -- broad repo packaging changes -- public launch polish and release assets +- launch polish, screenshots, comparison pages, or public release tag work +- broad UI work +- MCP transport expansion +- generic plugin/SDK ecosystem work +- hosted ingestion services ## Required Deliverables -- runnable OpenClaw adapter/import path -- sample or documented real OpenClaw fixture path -- provenance-preserving import mapping -- recall/resumption proof against imported data -- optional MCP proof against imported data if used to validate augmentation mode -- synced docs, reports, and any new ADR boundary needed for the adapter +- at least three production-usable importers total across shipped Phase 9 work +- reproducible fixtures and loader paths for the newly added importers +- explicit importer provenance/dedupe policy if generalized beyond OpenClaw +- local evaluation harness command(s) +- sample baseline report generated from repo-local fixtures +- synced docs, reports, and any needed ADRs ## Acceptance Criteria -- a sample or real OpenClaw workspace can be imported through the documented path -- imported material becomes queryable via Alice recall -- imported material contributes useful output to Alice resumption briefs -- imported provenance is explicit enough to distinguish adapter-ingested material from native Alice capture -- if MCP augmentation is exercised, one shipped MCP tool path works successfully against imported data without widening the tool contract +- at least three production-usable importers exist by the end of `P9-S37` +- newly added imported sources become queryable through Alice recall +- newly added imported sources contribute useful output to Alice resumption +- duplicate-memory posture is measurable and deterministic for every shipped importer +- a local evaluation script runs successfully and produces a baseline report from repo fixtures +- correction-aware behavior is represented in the baseline evidence, not just import-success metrics ## Required Commands @@ -160,111 +187,126 @@ curl -sS http://127.0.0.1:8000/healthz pnpm --dir apps/web test ``` -If a dedicated OpenClaw import command or adapter loader is introduced this sprint, it must be run and included in review evidence together with at least one recall and one resumption proof against imported data. +If dedicated importer loaders or evaluation commands are introduced this sprint, they must be run and included in review evidence together with at least one generated baseline report path. ## Required Acceptance Evidence -- exact OpenClaw input fixture or workspace path used during verification -- exact import command/path used during verification -- one successful recall example against imported data -- one successful resumption example against imported data -- note of import provenance and dedupe posture actually observed -- if used, one successful shipped MCP tool call against imported data +- exact importer fixture paths and commands used during verification +- proof that at least three production-usable importers are working in total +- one successful recall example from at least one newly added importer +- one successful resumption example from at least one newly added importer +- one generated evaluation/baseline report path and summary +- measured duplicate-memory posture and correction-aware outcome evidence ## Implementation Constraints -- preserve shipped P5/P6/P7/P8/P9-S33/P9-S34/P9-S35 semantics -- do not bypass Alice continuity objects or correction semantics for imported data -- keep the adapter narrow and specific to OpenClaw in this sprint -- keep provenance explicit and deterministic -- prefer an auditable import path over a “magic sync” abstraction +- preserve shipped P5/P6/P7/P8/P9-S33/P9-S34/P9-S35/P9-S36 semantics +- keep provenance explicit and deterministic for every importer +- do not invent source-specific retrieval semantics +- prefer a narrow shared importer discipline over an over-abstracted framework +- ensure evaluation claims are reproducible from repo-local commands and fixtures ## Control Tower Task Cards -### Task 1: Adapter Boundary and Models +### Task 1: Importer Expansion -Owner: interop/adapter owner +Owner: import/interop owner Write scope: -- `apps/api/src/alicebot_api/openclaw_adapter.py` -- `apps/api/src/alicebot_api/openclaw_models.py` +- `apps/api/src/alicebot_api/importers/` +- `apps/api/src/alicebot_api/markdown_import.py` +- `apps/api/src/alicebot_api/chatgpt_import.py` +- `apps/api/src/alicebot_api/claude_import.py` +- `apps/api/src/alicebot_api/csv_import.py` +- `apps/api/src/alicebot_api/importer_models.py` - `apps/api/src/alicebot_api/openclaw_import.py` -- `docs/adr/ADR-004-openclaw-integration-boundary.md` Responsibilities: -- define the OpenClaw import boundary -- define supported file/input shapes for the first adapter pass -- keep provenance and dedupe rules explicit -- avoid drifting into generic importer-framework work +- add at least two additional production-usable importers beyond OpenClaw +- keep provenance and dedupe posture explicit and source-aware +- share only the abstractions that are truly common across importers +- avoid widening into launch-polish or generic platform work -### Task 2: Continuity Mapping and Storage +### Task 2: Continuity and Evaluation Wiring Owner: backend/runtime owner Write scope: - `apps/api/src/alicebot_api/store.py` -- `apps/api/src/alicebot_api/continuity_capture.py` +- `apps/api/src/alicebot_api/retrieval_evaluation.py` - `apps/api/src/alicebot_api/continuity_recall.py` - `apps/api/src/alicebot_api/continuity_resumption.py` -- `apps/api/src/alicebot_api/mcp_tools.py` +- `scripts/run_phase9_eval.py` +- `scripts/run_phase9_eval.sh` Responsibilities: -- map imported OpenClaw material into shipped Alice continuity semantics -- preserve deterministic retrieval/resumption behavior -- expose imported provenance through recall/resumption/MCP where relevant -- fix only true parity gaps exposed by the adapter +- ensure imported data behaves consistently through shipped recall/resumption semantics +- implement the local evaluation harness and baseline report generation +- keep metrics narrow, reproducible, and tied to actual continuity outcomes +- fix only true shared importer/eval defects -### Task 3: Fixtures, Demo Path, and Docs +### Task 3: Fixtures, Docs, and Policy Owner: docs/integration owner Write scope: -- `ARCHITECTURE.md` - `README.md` - `ROADMAP.md` +- `ARCHITECTURE.md` - `RULES.md` - `.ai/handoff/CURRENT_STATE.md` - `docs/phase9-sprint-33-38-plan.md` -- `fixtures/openclaw/` -- `scripts/load_openclaw_sample_data.py` -- `scripts/load_openclaw_sample_data.sh` +- `docs/adr/ADR-005-import-provenance-and-dedupe-strategy.md` +- `docs/adr/ADR-007-public-evaluation-harness-scope.md` +- `fixtures/importers/` +- `eval/` Responsibilities: -- provide one reproducible local OpenClaw demo path -- document exact import steps and expected outcomes -- keep startup/sample-data guidance from `P9-S33` unchanged -- keep architecture/rules/planning docs aligned with the shipped adapter boundary and importer posture -- make the next seam toward broader importers/eval explicit +- document exact importer and evaluation paths +- keep provenance/dedupe rules explicit in canonical docs +- keep Phase 9 sequencing factual and non-redundant +- leave `P9-S38` with evidence-backed documentation inputs rather than open product questions -### Task 4: Verification and Interop Proof +### Task 4: Verification and Evidence Owner: sprint integrator Write scope: -- `tests/unit/test_openclaw_adapter.py` -- `tests/integration/test_openclaw_import.py` -- `tests/integration/test_openclaw_mcp_integration.py` +- `tests/unit/test_importers.py` +- `tests/unit/test_phase9_eval.py` +- `tests/__init__.py` +- `tests/unit/__init__.py` +- `tests/integration/__init__.py` +- `tests/integration/test_markdown_import.py` +- `tests/integration/test_chatgpt_import.py` +- `tests/integration/test_claude_import.py` +- `tests/integration/test_csv_import.py` +- `tests/integration/test_phase9_eval.py` +- `apps/web/components/approval-detail.test.tsx` +- `apps/web/components/continuity-open-loops-panel.test.tsx` +- `apps/web/components/workflow-memory-writeback-form.tsx` - `BUILD_REPORT.md` - `REVIEW_REPORT.md` Responsibilities: -- prove import works against the documented fixture/workspace shape -- prove recall/resumption work against imported data -- prove any MCP augmentation path stays within the shipped tool contract -- keep scope hygiene explicit if supporting files are touched +- prove the newly added importers work against documented fixtures +- prove dedupe and provenance behavior are deterministic +- prove the evaluation harness runs and generates a baseline report +- land only the minimum adjacent verification fixes needed to keep required suites green +- keep scope hygiene explicit if support files are touched ## Definition Of Done -- `P9-S36` OpenClaw adapter/import path exists and is runnable from the documented local install -- imported OpenClaw data is queryable through shipped Alice recall/resumption semantics -- provenance and dedupe posture are explicit and reviewable +- `P9-S37` ships at least three production-usable importers in total +- local evaluation harness exists and generates a reproducible baseline report +- imported data from newly added sources behaves correctly through shipped Alice recall/resumption semantics - docs, tests, build report, and review report are aligned -- no broad importer-bundle or launch-polish work leaks into the sprint +- no launch-polish or release-tag work leaks into the sprint diff --git a/.ai/handoff/CURRENT_STATE.md b/.ai/handoff/CURRENT_STATE.md index 3104fe1..faf4d2b 100644 --- a/.ai/handoff/CURRENT_STATE.md +++ b/.ai/handoff/CURRENT_STATE.md @@ -20,8 +20,8 @@ ## Incomplete / At-Risk Areas -- importer coverage is still limited to one shipped adapter path (OpenClaw) -- broader importer story is not yet public-ready +- importer coverage is now broader but still file-import scoped (OpenClaw + Markdown + ChatGPT) +- evaluation harness is local and fixture-backed; hosted/remote benchmarking is still intentionally out of scope - OSS license finalization is still open ## Current Milestone @@ -30,7 +30,7 @@ Phase 9: Alice Public Core and Agent Interop ## Latest State Summary -`P9-S33`, `P9-S34`, `P9-S35`, and `P9-S36` are now shipped baselines: +`P9-S33`, `P9-S34`, `P9-S35`, `P9-S36`, and `P9-S37` are now shipped baselines: - package boundary is documented around `alice-core` - canonical local startup path is documented and script-backed @@ -64,12 +64,17 @@ Phase 9: Alice Public Core and Agent Interop - import -> recall/resumption behavior on imported scope - shipped MCP `alice_recall`/`alice_resume` usage over imported data without MCP surface expansion - ADR-004 defines the accepted OpenClaw integration boundary and scope constraints. - -The next active seam is `P9-S37`: - -- expand from single-adapter proof to broader importer coverage and evaluation harness work -- keep parity strict with existing deterministic continuity semantics -- avoid widening MCP transport semantics unless parity defects are found +- Additional importer paths now exist and are shipped: + - markdown importer: `markdown_import.py` plus `./scripts/load_markdown_sample_data.sh` + - ChatGPT importer: `chatgpt_import.py` plus `./scripts/load_chatgpt_sample_data.sh` +- Shared importer provenance/dedupe persistence now uses one deterministic policy seam: + - `apps/api/src/alicebot_api/importers/common.py` + - importer-typed provenance fields with source-specific dedupe keys +- Local Phase 9 evaluation harness is now shipped: + - command: `./scripts/run_phase9_eval.sh` + - generated report path: `eval/reports/phase9_eval_latest.json` + - committed baseline report path: `eval/baselines/phase9_s37_baseline.json` +- ADR-005 and ADR-007 now define the accepted importer provenance/dedupe and evaluation-harness boundaries. ## Critical Constraints @@ -80,11 +85,11 @@ The next active seam is `P9-S37`: ## Immediate Next Move -Execute `P9-S37` on top of the shipped `P9-S36` boundary: +Execute `P9-S38` on top of the shipped `P9-S37` boundary: -- broaden importer coverage beyond OpenClaw using the same explicit provenance posture -- add benchmark/evaluation harness evidence for import quality and correction-aware continuity outcomes -- preserve startup/sample-data path and avoid MCP contract expansion unless needed for parity fixes +- convert shipped `P9-S37` evidence and commands into launch-quality public docs +- keep claims anchored to reproducible local importer/evaluation evidence +- preserve startup/sample-data/runtime determinism and avoid MCP contract expansion unless parity defects are found ## Legacy Compatibility Markers diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index fa9edf0..01e5f52 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -13,12 +13,13 @@ The current implementation already includes: Phase 9 does not replace that architecture. It packages and exposes it through public-safe boundaries. -Current public packaging baseline now spans `P9-S33` through `P9-S36`: +Current public packaging baseline now spans `P9-S33` through `P9-S37`: - `P9-S33`: public-safe core boundary and canonical local startup - `P9-S34`: deterministic local CLI continuity contract - `P9-S35`: narrow deterministic MCP transport contract - `P9-S36`: first OpenClaw adapter/import boundary with provenance + dedupe posture +- `P9-S37`: additional markdown and ChatGPT importer paths plus reproducible local evaluation harness ## Technical Stack @@ -28,14 +29,15 @@ Current public packaging baseline now spans `P9-S33` through `P9-S36`: - Vector support: `pgvector` - Local infrastructure: Docker Compose, Redis, MinIO - Testing: pytest, Vitest -- Shipped packaging/runtime baseline (`P9-S33` to `P9-S36`): +- Shipped packaging/runtime baseline (`P9-S33` to `P9-S37`): - `alice-core` (published package name in `pyproject.toml`) - deterministic fixture loader (`scripts/load_sample_data.sh`) - deterministic local CLI contract (`python -m alicebot_api ...`) - deterministic local MCP transport (`python -m alicebot_api.mcp_server`) - OpenClaw import loader and fixture path (`scripts/load_openclaw_sample_data.sh`) -- Deferred/next packaging targets (`P9-S37+`): - - broader importer set and evaluation harness outputs + - markdown import loader and fixture path (`scripts/load_markdown_sample_data.sh`) + - ChatGPT import loader and fixture path (`scripts/load_chatgpt_sample_data.sh`) + - Phase 9 evaluation harness (`scripts/run_phase9_eval.sh`) ## High-Level Architecture @@ -60,10 +62,12 @@ Current public packaging baseline now spans `P9-S33` through `P9-S36`: - terminal access to public core flows 3. `alice-mcp-server` (shipped baseline in `P9-S35`, narrow tool contract) - stable MCP tool surface for external assistants -4. `alice-importers` (next seam in `P9-S37`) - - markdown, chat export, CSV, and adapter-backed imports -5. `alice-openclaw` (shipped baseline in `P9-S36`, adapter-scoped import path) - - OpenClaw-specific ingestion and interop mapping +4. `alice-importers` (shipped baseline in `P9-S37`) + - markdown import path + - ChatGPT export import path + - shared provenance + dedupe persistence strategy +5. `alice-openclaw` (shipped baseline in `P9-S36`, integrated with shared importer persistence in `P9-S37`) + - OpenClaw-specific normalization with shared deterministic import persistence ## Module Boundaries diff --git a/BUILD_REPORT.md b/BUILD_REPORT.md index 22b45e2..6634dd2 100644 --- a/BUILD_REPORT.md +++ b/BUILD_REPORT.md @@ -1,73 +1,90 @@ # BUILD_REPORT.md ## sprint objective -Ship `P9-S36` by adding the first OpenClaw adapter/import path so OpenClaw workspace or durable-memory data can be imported into Alice continuity objects with explicit provenance and deterministic dedupe, then queried through shipped recall/resumption semantics and optionally through the shipped MCP tool surface without widening MCP contracts. +Ship `P9-S37` by expanding importer coverage beyond OpenClaw to at least three production-usable importers total, generalizing deterministic provenance/dedupe posture across importers, and shipping a reproducible local evaluation harness with baseline report evidence for recall precision, resumption usefulness, correction effectiveness, importer success, and duplicate-memory posture. ## completed work -- Implemented OpenClaw adapter boundary and input normalization: - - `apps/api/src/alicebot_api/openclaw_models.py` - - `apps/api/src/alicebot_api/openclaw_adapter.py` - - Supported source contract: - - JSON file with `durable_memory` / `memories` / `items` / `records` - - workspace directory with known JSON files (`workspace.json`, `openclaw_workspace.json`, `durable_memory.json`, `memories.json`, `openclaw_memories.json`) -- Implemented OpenClaw import-to-continuity mapping: - - `apps/api/src/alicebot_api/openclaw_import.py` - - deterministic mapping into shipped continuity object types (`Decision`, `NextAction`, `WaitingFor`, `Commitment`, etc.) - - explicit provenance tagging on imported material (`source_kind=openclaw_import`, workspace/source metadata) - - deterministic dedupe posture via stable workspace+payload fingerprint (`openclaw_dedupe_key`) -- Hardened importer lifecycle-status handling: - - unknown external `status` values are rejected with explicit validation errors - - importer no longer silently coerces unknown statuses to `active` -- Added reproducible fixture and local import path: - - `fixtures/openclaw/workspace_v1.json` - - `scripts/load_openclaw_sample_data.py` - - `scripts/load_openclaw_sample_data.sh` -- Added verification coverage for adapter/import/interop: - - `tests/unit/test_openclaw_adapter.py` - - `tests/integration/test_openclaw_import.py` - - `tests/integration/test_openclaw_mcp_integration.py` -- Added adapter boundary ADR: - - `docs/adr/ADR-004-openclaw-integration-boundary.md` -- Synced sprint-scoped docs: - - `README.md` - - `ROADMAP.md` - - `.ai/handoff/CURRENT_STATE.md` - - `ARCHITECTURE.md` - - `RULES.md` - - `.ai/active/SPRINT_PACKET.md` (scope hygiene annotation for archived planning artifacts) +- Added two new production-usable importer paths: + - `markdown_import` (`apps/api/src/alicebot_api/markdown_import.py`) + - `chatgpt_import` (`apps/api/src/alicebot_api/chatgpt_import.py`) +- Generalized importer provenance/dedupe persistence: + - shared importer model + normalization helpers in `apps/api/src/alicebot_api/importer_models.py` + - shared importer persistence seam in `apps/api/src/alicebot_api/importers/common.py` + - `openclaw_import.py` refactored to use shared importer persistence logic +- Added reproducible importer fixtures: + - `fixtures/importers/markdown/workspace_v1.md` + - `fixtures/importers/chatgpt/workspace_v1.json` +- Added importer loader commands: + - `scripts/load_markdown_sample_data.py` + `scripts/load_markdown_sample_data.sh` + - `scripts/load_chatgpt_sample_data.py` + `scripts/load_chatgpt_sample_data.sh` +- Added local evaluation harness and report writer: + - `scripts/run_phase9_eval.py` + - `scripts/run_phase9_eval.sh` + - `apps/api/src/alicebot_api/retrieval_evaluation.py` extended with: + - `run_phase9_evaluation` + - `write_phase9_evaluation_report` + - importer/eval metrics for recall, resumption, correction, and dedupe posture +- Generated and committed baseline evidence: + - `eval/reports/phase9_eval_latest.json` + - `eval/baselines/phase9_s37_baseline.json` +- Added tests for importer/eval behavior: + - `tests/unit/test_importers.py` + - `tests/unit/test_phase9_eval.py` + - `tests/integration/test_markdown_import.py` + - `tests/integration/test_chatgpt_import.py` + - `tests/integration/test_phase9_eval.py` + - plus shared-package test import stabilization via `tests/__init__.py`, `tests/unit/__init__.py`, `tests/integration/__init__.py` +- Synced sprint-scoped docs and ADRs: + - `README.md`, `ROADMAP.md`, `ARCHITECTURE.md`, `RULES.md`, `.ai/handoff/CURRENT_STATE.md`, `docs/phase9-sprint-33-38-plan.md` + - `docs/adr/ADR-005-import-provenance-and-dedupe-strategy.md` + - `docs/adr/ADR-007-public-evaluation-harness-scope.md` ## incomplete work -- None inside `P9-S36` scope. +- None inside `P9-S37` scope. - Intentionally deferred (out of scope): - - generic multi-source importer framework + - Claude importer path + - CSV importer path - MCP tool-surface expansion - - hosted adapter/auth/service work + - hosted/remote ingestion or evaluation infrastructure ## files changed -- `apps/api/src/alicebot_api/openclaw_models.py` -- `apps/api/src/alicebot_api/openclaw_adapter.py` +- `.ai/active/SPRINT_PACKET.md` +- `apps/api/src/alicebot_api/importer_models.py` +- `apps/api/src/alicebot_api/importers/__init__.py` +- `apps/api/src/alicebot_api/importers/common.py` - `apps/api/src/alicebot_api/openclaw_import.py` -- `scripts/load_openclaw_sample_data.py` -- `scripts/load_openclaw_sample_data.sh` -- `fixtures/openclaw/workspace_v1.json` -- `tests/unit/test_openclaw_adapter.py` -- `tests/integration/test_openclaw_import.py` -- `tests/integration/test_openclaw_mcp_integration.py` -- `docs/adr/ADR-004-openclaw-integration-boundary.md` -- `.ai/archive/planning/2026-04-07-phase9-bootstrap/SPRINT_PACKET.md` -- `.ai/archive/planning/2026-04-07-phase9-bootstrap/CURRENT_STATE.md` -- `docs/archive/planning/2026-04-07-phase9-bootstrap/README.md` -- `docs/archive/planning/2026-04-07-phase9-bootstrap/ROADMAP.md` -- `docs/archive/planning/2026-04-07-phase9-bootstrap/PRODUCT_BRIEF.md` -- `docs/archive/planning/2026-04-07-phase9-bootstrap/ARCHITECTURE.md` -- `docs/archive/planning/2026-04-07-phase9-bootstrap/RULES.md` -- `ARCHITECTURE.md` -- `RULES.md` +- `apps/api/src/alicebot_api/markdown_import.py` +- `apps/api/src/alicebot_api/chatgpt_import.py` +- `apps/api/src/alicebot_api/retrieval_evaluation.py` +- `scripts/load_markdown_sample_data.py` +- `scripts/load_markdown_sample_data.sh` +- `scripts/load_chatgpt_sample_data.py` +- `scripts/load_chatgpt_sample_data.sh` +- `scripts/run_phase9_eval.py` +- `scripts/run_phase9_eval.sh` +- `fixtures/importers/markdown/workspace_v1.md` +- `fixtures/importers/chatgpt/workspace_v1.json` +- `eval/reports/phase9_eval_latest.json` +- `eval/baselines/phase9_s37_baseline.json` +- `tests/unit/test_importers.py` +- `tests/unit/test_phase9_eval.py` +- `tests/integration/test_markdown_import.py` +- `tests/integration/test_chatgpt_import.py` +- `tests/integration/test_phase9_eval.py` +- `tests/__init__.py` +- `tests/unit/__init__.py` +- `tests/integration/__init__.py` +- `apps/web/components/approval-detail.test.tsx` +- `apps/web/components/continuity-open-loops-panel.test.tsx` +- `apps/web/components/workflow-memory-writeback-form.tsx` - `README.md` - `ROADMAP.md` -- `docs/phase9-sprint-33-38-plan.md` +- `ARCHITECTURE.md` +- `RULES.md` - `.ai/handoff/CURRENT_STATE.md` -- `.ai/active/SPRINT_PACKET.md` +- `docs/phase9-sprint-33-38-plan.md` +- `docs/adr/ADR-005-import-provenance-and-dedupe-strategy.md` +- `docs/adr/ADR-007-public-evaluation-harness-scope.md` - `BUILD_REPORT.md` - `REVIEW_REPORT.md` @@ -78,32 +95,35 @@ Ship `P9-S36` by adding the first OpenClaw adapter/import path so OpenClaw works - PASS - `./scripts/load_sample_data.sh` - PASS (`status=noop`, already loaded) -- `./scripts/load_openclaw_sample_data.sh --source fixtures/openclaw/workspace_v1.json` - - PASS (`status=ok`, `imported_count=4`, `skipped_duplicates=1`) -- `./scripts/load_openclaw_sample_data.sh --source fixtures/openclaw/workspace_v1.json` - - PASS (`status=noop`, `imported_count=0`, `skipped_duplicates=5`) +- `./scripts/load_openclaw_sample_data.sh --user-id 00000000-0000-0000-0000-000000000038 --user-email openclaw-import-038@example.com --display-name "OpenClaw Import 038" --source fixtures/openclaw/workspace_v1.json` + - PASS (`imported_count=4`, `skipped_duplicates=1`) +- `./scripts/load_markdown_sample_data.sh --user-id 00000000-0000-0000-0000-000000000038 --source fixtures/importers/markdown/workspace_v1.md` + - PASS (`imported_count=4`, `skipped_duplicates=1`) +- `./scripts/load_chatgpt_sample_data.sh --user-id 00000000-0000-0000-0000-000000000038 --source fixtures/importers/chatgpt/workspace_v1.json` + - PASS (`imported_count=4`, `skipped_duplicates=1`) - `APP_RELOAD=false ./scripts/api_dev.sh` - - PASS (started on `http://127.0.0.1:8000`) + - PASS (startup observed; server on `127.0.0.1:8000`) - `curl -sS http://127.0.0.1:8000/healthz` - - PASS (`status":"ok"`) -- `./.venv/bin/python -m alicebot_api recall --thread-id cccccccc-cccc-4ccc-8ccc-cccccccccccc --project "Alice Public Core" --query "MCP tool surface" --limit 5` - - PASS (returned imported OpenClaw `Decision` with `source_kind=openclaw_import` provenance references) -- `./.venv/bin/python -m alicebot_api resume --thread-id cccccccc-cccc-4ccc-8ccc-cccccccccccc --max-recent-changes 5 --max-open-loops 5` - - PASS (`last_decision`, `next_action`, and `recent_changes` include imported OpenClaw data) -- `./.venv/bin/python -m pytest tests/unit/test_openclaw_adapter.py -q` - - PASS (`5 passed`) -- `./.venv/bin/python -m pytest tests/integration/test_openclaw_import.py tests/integration/test_openclaw_mcp_integration.py -q` - - PASS (`2 passed`) -- `./.venv/bin/python -m pytest tests/unit/test_openclaw_adapter.py tests/integration/test_openclaw_import.py tests/integration/test_openclaw_mcp_integration.py -q` + - PASS (`{"status":"ok", ...}`) +- `./.venv/bin/python -m alicebot_api --user-id 00000000-0000-0000-0000-000000000038 recall --thread-id eeeeeeee-eeee-4eee-8eee-eeeeeeeeeeee --project "Markdown Import Project" --query "markdown importer deterministic" --limit 5` + - PASS (returned markdown-imported records with explicit provenance) +- `./.venv/bin/python -m alicebot_api --user-id 00000000-0000-0000-0000-000000000038 resume --thread-id eeeeeeee-eeee-4eee-8eee-eeeeeeeeeeee --project "Markdown Import Project" --max-recent-changes 5 --max-open-loops 5` + - PASS (`last_decision` + `next_action` from `markdown_import`) +- `./scripts/run_phase9_eval.sh --user-id 00000000-0000-0000-0000-000000000039 --user-email phase9-eval-039@example.com --display-name "Phase9 Eval 039" --report-path eval/reports/phase9_eval_latest.json` + - PASS (`status=pass`, all rates `1.0`) +- `./.venv/bin/python -m pytest tests/unit/test_importers.py tests/unit/test_phase9_eval.py -q` - PASS (`7 passed`) +- `./.venv/bin/python -m pytest tests/integration/test_openclaw_import.py tests/integration/test_markdown_import.py tests/integration/test_chatgpt_import.py tests/integration/test_phase9_eval.py -q` + - PASS (`4 passed`) - `./.venv/bin/python -m pytest tests/unit tests/integration` - - PASS (`968 passed in 90.94s`) + - PASS (`978 passed`) - `pnpm --dir apps/web test` - PASS (`57 files, 192 tests`) ## blockers/issues -- Sandbox restrictions required elevated execution for localhost Postgres/API verification commands. -- No remaining functional blockers in sprint scope. +- Localhost database/network checks from sandboxed runs required escalated command execution for several required verification commands. +- One transient verification failure was encountered and resolved: + - `load_openclaw_sample_data.sh` with a new user ID initially failed due global unique email constraint (`users_email_key`) when default import email was reused; rerun with unique `--user-email` succeeded. ## recommended next step -Start `P9-S37` by generalizing importer coverage from the now-shipped OpenClaw boundary while preserving the same explicit provenance and dedupe posture, and adding benchmark/evaluation harness evidence for importer quality. +Execute `P9-S38` by turning the now-shipped importer/evaluation evidence and commands into launch-quality external docs without widening MCP transport or importer semantics. diff --git a/README.md b/README.md index 1e03c4e..b2c4ece 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Alice is a local-first memory and continuity engine for AI agents. -`P9-S33` shipped the public-core baseline. `P9-S34` shipped the deterministic local CLI for continuity flows on top of that baseline. `P9-S35` shipped a narrow MCP transport for the same continuity contract. `P9-S36` ships the first OpenClaw adapter/import path on top of those shipped surfaces. +`P9-S33` shipped the public-core baseline. `P9-S34` shipped the deterministic local CLI for continuity flows on top of that baseline. `P9-S35` shipped a narrow MCP transport for the same continuity contract. `P9-S36` shipped the first OpenClaw adapter/import path on top of those shipped surfaces. `P9-S37` is now shipped with broader importer coverage and a reproducible local evaluation harness. ## Canonical Local Startup Path (`P9-S33`) @@ -112,6 +112,32 @@ Sample proof commands against imported scope: Dedupe posture is deterministic: re-running the same import returns `status=noop` with `skipped_duplicates=5`. +## Importers and Eval Harness (`P9-S37`) + +`P9-S37` delivers three production-usable importers in total: + +- OpenClaw (`openclaw_import`) +- Markdown (`markdown_import`) +- ChatGPT export (`chatgpt_import`) + +Run the deterministic importer loaders: + +```bash +./scripts/load_openclaw_sample_data.sh --source fixtures/openclaw/workspace_v1.json +./scripts/load_markdown_sample_data.sh --source fixtures/importers/markdown/workspace_v1.md +./scripts/load_chatgpt_sample_data.sh --source fixtures/importers/chatgpt/workspace_v1.json +``` + +Run the local Phase 9 evaluation harness and write a report: + +```bash +./scripts/run_phase9_eval.sh --user-id 00000000-0000-0000-0000-000000000037 --report-path eval/reports/phase9_eval_latest.json +``` + +Committed baseline sample report path: + +- `eval/baselines/phase9_s37_baseline.json` + ### Compatible Client Example (Claude Desktop MCP) `claude_desktop_config.json` example: @@ -144,6 +170,8 @@ Dedupe posture is deterministic: re-running the same import returns `status=noop - `apps/web`: operator shell - `fixtures/public_sample_data`: deterministic public-core sample dataset - `fixtures/openclaw`: deterministic OpenClaw adapter fixture dataset +- `fixtures/importers`: deterministic markdown/chatgpt importer fixtures +- `eval`: Phase 9 evaluation reports and baselines - `scripts`: startup, migration, and sample-data load scripts - `docs`: product, architecture, ADRs, and Phase 9 planning docs @@ -160,6 +188,8 @@ Dedupe posture is deterministic: re-running the same import returns `status=noop - [docs/phase9-public-core-boundary.md](docs/phase9-public-core-boundary.md) - [docs/phase9-bootstrap-notes.md](docs/phase9-bootstrap-notes.md) - [docs/adr/ADR-004-openclaw-integration-boundary.md](docs/adr/ADR-004-openclaw-integration-boundary.md) +- [docs/adr/ADR-005-import-provenance-and-dedupe-strategy.md](docs/adr/ADR-005-import-provenance-and-dedupe-strategy.md) +- [docs/adr/ADR-007-public-evaluation-harness-scope.md](docs/adr/ADR-007-public-evaluation-harness-scope.md) ## Legacy Compatibility Marker diff --git a/REVIEW_REPORT.md b/REVIEW_REPORT.md index 3910c71..9a6f9c1 100644 --- a/REVIEW_REPORT.md +++ b/REVIEW_REPORT.md @@ -4,41 +4,37 @@ PASS ## criteria met -- OpenClaw adapter/import boundary is implemented and runnable with fixture + loader scripts. -- Imported material is queryable through shipped recall semantics and contributes to shipped resumption output. -- Imported provenance remains explicit (`source_kind=openclaw_import`, `openclaw_*` metadata fields). -- Dedupe posture remains deterministic and idempotent (initial import + noop re-import behavior preserved). -- MCP augmentation proof remains within shipped tool contract (`alice_recall`, `alice_resume`). -- Status-handling fix landed: unknown external `status` values are now explicitly rejected instead of silently coerced to `active`. -- Scope/docs hygiene fixes landed: - - sprint packet scope now explicitly allows the archive snapshots that were added for traceability - - build report files-changed list now includes archive paths - - architecture and rules docs were updated to align with shipped `P9-S34/35/36` status and importer status-mapping discipline -- Verification rerun after fixes: - - `./.venv/bin/python -m pytest tests/unit/test_openclaw_adapter.py -q` -> `5 passed` - - `./.venv/bin/python -m pytest tests/integration/test_openclaw_import.py tests/integration/test_openclaw_mcp_integration.py -q` -> `2 passed` - - `./.venv/bin/python -m pytest tests/unit/test_openclaw_adapter.py tests/integration/test_openclaw_import.py tests/integration/test_openclaw_mcp_integration.py -q` -> `7 passed` - - `./.venv/bin/python -m pytest tests/unit tests/integration` -> `968 passed` - - `pnpm --dir apps/web test` -> `57 files, 192 tests` +- At least three production-usable importers are present and working in total (OpenClaw, Markdown, ChatGPT export). +- Newly added imported sources are queryable through recall and contribute useful resumption output. +- Duplicate-memory posture is deterministic and measurable per importer (first import persists, replay import noops with duplicate skips). +- A local evaluation harness exists, runs via a canonical command path, and produces baseline evidence from repo fixtures. +- Correction-aware behavior is represented in baseline evidence (`correction_effectiveness_rate`). +- Sprint docs and ADRs are synchronized with the shipped importer/evaluation behavior. ## criteria missed - None. ## quality issues -- No blocking quality issues found in sprint scope after fixes. +- Resolved during review-fix cycle: + - Added `scripts/run_phase9_eval.sh` and updated docs to use it as the canonical reproducible command path. + - Fixed async timing issues in web tests to remove full-suite instability: + - `apps/web/components/approval-detail.test.tsx` + - `apps/web/components/continuity-open-loops-panel.test.tsx` + - Adjusted status-reset behavior in `apps/web/components/workflow-memory-writeback-form.tsx` so successful submit feedback is not immediately overwritten. ## regression risks - Low. -- Residual risk is primarily future importer expansion drift; current adapter path is protected by targeted and full-suite passing tests. +- Main ongoing risk is future importer additions bypassing shared persistence/dedupe discipline in `apps/api/src/alicebot_api/importers/common.py`; current tests cover the three shipped importers and evaluation harness behavior. ## docs issues -- No blocking docs issues remain for this sprint. +- None blocking. +- Canonical eval command references now consistently use `./scripts/run_phase9_eval.sh`. ## should anything be added to RULES.md? -- Already addressed in this pass: importer rule added requiring unknown external lifecycle/status values to be explicitly mapped or rejected. +- No additional rule is required beyond the now-updated reproducibility requirement. ## should anything update ARCHITECTURE.md? -- Already addressed in this pass: Phase 9 packaging-state language now reflects shipped `P9-S34` CLI, `P9-S35` MCP transport, and `P9-S36` OpenClaw adapter baseline. +- No further update required; architecture docs already reflect the shipped importer/eval baseline and command path. ## recommended next action -1. Proceed to `P9-S37` importer expansion, preserving the same provenance/dedupe discipline and explicit status-mapping posture. +1. Proceed to `P9-S38` launch/documentation work using `eval/baselines/phase9_s37_baseline.json` and the shipped loader/eval commands as canonical evidence. diff --git a/ROADMAP.md b/ROADMAP.md index 519d3f4..4be3936 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -49,12 +49,17 @@ Success condition: - import path for OpenClaw durable memory/workspace data - Alice MCP augmentation mode for OpenClaw-style workflows -### P9-S37: Importers and Evaluation Harness (current seam) +### P9-S37: Importers and Evaluation Harness (shipped baseline) -- at least three production-usable importers -- local benchmark and baseline report generation +- three production-usable importers are now shipped: + - OpenClaw + - Markdown + - ChatGPT export +- deterministic importer provenance + dedupe policy is generalized across importers +- local evaluation harness command is shipped (`./scripts/run_phase9_eval.sh`) +- baseline evidence report is now generated and checked in (`eval/baselines/phase9_s37_baseline.json`) -### P9-S38: Docs, Launch Assets, and Public Release +### P9-S38: Docs, Launch Assets, and Public Release (current delivery seam) - public quickstart - integration docs @@ -82,6 +87,7 @@ Success condition: - `P9-S33` delivered the public-safe `alice-core` boundary, canonical local startup path, and deterministic sample-data proof. - `P9-S34` delivered the shipped local CLI continuity contract that `P9-S35` should mirror through MCP. - `P9-S35` delivered the shipped local MCP contract that `P9-S36` should consume without widening. +- `P9-S36` delivered the shipped OpenClaw adapter baseline that `P9-S37` should generalize without reopening transport semantics. ## Legacy Compatibility Markers diff --git a/RULES.md b/RULES.md index 2b0f29a..4455746 100644 --- a/RULES.md +++ b/RULES.md @@ -26,6 +26,7 @@ - Preserve append-only continuity, correction, and revision history. - Keep imported data provenance explicit. +- Every shipped importer must persist a source-specific deterministic dedupe key in provenance (for example `_dedupe_key`) and keep `source_kind` explicit. - Importers must explicitly map or reject unknown external lifecycle/status values; do not silently coerce them to `active`. - Default memory admission to conservative behavior; do not loosen admission discipline for launch convenience. - Do not silently overwrite stale or superseded truth. @@ -44,6 +45,7 @@ - CLI commands need deterministic golden-output tests. - MCP tools need stable contract tests. - Importers need fixture-backed success, dedupe, and failure-path tests. +- Phase 9 importer/evaluation claims must be reproducible from `./scripts/run_phase9_eval.sh` and repo-local fixtures. - Do not make public memory-quality or recall-quality claims without evaluation evidence. ## Legacy Compatibility Marker diff --git a/apps/api/src/alicebot_api/chatgpt_import.py b/apps/api/src/alicebot_api/chatgpt_import.py new file mode 100644 index 0000000..42664df --- /dev/null +++ b/apps/api/src/alicebot_api/chatgpt_import.py @@ -0,0 +1,401 @@ +from __future__ import annotations + +import json +from pathlib import Path +from uuid import UUID + +from alicebot_api.importer_models import ( + ImporterNormalizedBatch, + ImporterNormalizedItem, + ImporterValidationError, + ImporterWorkspaceContext, + OBJECT_TYPE_TO_BODY_KEY, + OBJECT_TYPE_TO_PREFIX, + as_json_object, + dedupe_key_for_payload, + merge_json_objects, + normalize_object_type, + normalize_optional_text, + parse_optional_confidence, + parse_optional_status, +) +from alicebot_api.importers.common import ImportPersistenceConfig, import_normalized_batch +from alicebot_api.store import ContinuityStore, JsonObject + + +_DEFAULT_CONFIDENCE = 0.83 +_DEFAULT_DEDUPE_POSTURE = "workspace_conversation_message_fingerprint" +_PREFIX_TO_OBJECT_TYPE: tuple[tuple[str, str], ...] = ( + ("decision:", "Decision"), + ("next action:", "NextAction"), + ("next:", "NextAction"), + ("task:", "NextAction"), + ("commitment:", "Commitment"), + ("waiting for:", "WaitingFor"), + ("blocker:", "Blocker"), + ("fact:", "MemoryFact"), + ("remember:", "MemoryFact"), + ("note:", "Note"), +) + + +class ChatGPTImportValidationError(ImporterValidationError): + """Raised when a ChatGPT import payload is invalid.""" + + +def _truncate(value: str, *, max_length: int) -> str: + if len(value) <= max_length: + return value + return value[: max_length - 3].rstrip() + "..." + + +def _build_title(*, object_type: str, text: str, explicit_title: str | None) -> str: + if explicit_title is not None: + return _truncate(explicit_title, max_length=280) + prefix = OBJECT_TYPE_TO_PREFIX[object_type] + return _truncate(f"{prefix}: {text}", max_length=280) + + +def _build_raw_content(*, object_type: str, text: str) -> str: + prefix = OBJECT_TYPE_TO_PREFIX[object_type] + return f"{prefix}: {text}" + + +def _read_json(path: Path) -> object: + try: + return json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + raise ChatGPTImportValidationError( + f"invalid JSON at {path}: {exc.msg}" + ) from exc + + +def _normalize_message_text(value: object) -> str | None: + if isinstance(value, str): + return normalize_optional_text(value) + + if isinstance(value, list): + parts: list[str] = [] + for item in value: + normalized = _normalize_message_text(item) + if normalized is None: + continue + parts.append(normalized) + if not parts: + return None + return normalize_optional_text(" ".join(parts)) + + if isinstance(value, dict): + content = as_json_object(value) + for key in ("text", "content", "message"): + normalized = _normalize_message_text(content.get(key)) + if normalized is not None: + return normalized + normalized = _normalize_message_text(content.get("parts")) + if normalized is not None: + return normalized + return None + + return None + + +def _resolve_object_type_and_text(*, text: str, type_hint: object) -> tuple[str, str]: + hinted_type = normalize_object_type(type_hint) if type_hint is not None else None + if hinted_type is not None and hinted_type != "Note": + return hinted_type, text + + lowered = text.casefold() + for prefix, object_type in _PREFIX_TO_OBJECT_TYPE: + if not lowered.startswith(prefix): + continue + stripped = normalize_optional_text(text[len(prefix) :]) + if stripped is None: + raise ChatGPTImportValidationError("ChatGPT message content must not be empty") + return object_type, stripped + + if hinted_type is not None: + return hinted_type, text + + return "Note", text + + +def _extract_messages_from_simple_list(messages: object) -> list[JsonObject]: + if not isinstance(messages, list): + return [] + + output: list[JsonObject] = [] + for message in messages: + if not isinstance(message, dict): + continue + output.append(as_json_object(message)) + return output + + +def _extract_messages_from_mapping(mapping: object) -> list[JsonObject]: + if not isinstance(mapping, dict): + return [] + + nodes: list[tuple[float, str, JsonObject]] = [] + for key, raw_node in mapping.items(): + if not isinstance(raw_node, dict): + continue + node = as_json_object(raw_node) + message = as_json_object(node.get("message")) + if not message: + continue + + raw_created_at = message.get("create_time", node.get("create_time")) + created_at = 0.0 + if isinstance(raw_created_at, (int, float)): + created_at = float(raw_created_at) + elif isinstance(raw_created_at, str): + try: + created_at = float(raw_created_at.strip()) + except ValueError: + created_at = 0.0 + + message_id = normalize_optional_text(message.get("id")) or normalize_optional_text(key) or "" + nodes.append((created_at, message_id, message)) + + nodes.sort(key=lambda item: (item[0], item[1])) + return [node[2] for node in nodes] + + +def _extract_conversations(payload: object) -> list[JsonObject]: + if isinstance(payload, list): + return [as_json_object(item) for item in payload if isinstance(item, dict)] + + if not isinstance(payload, dict): + raise ChatGPTImportValidationError("ChatGPT source root must be a JSON object or array") + + payload_object = as_json_object(payload) + for key in ("conversations", "items", "records"): + raw_conversations = payload_object.get(key) + if raw_conversations is None: + continue + if not isinstance(raw_conversations, list): + raise ChatGPTImportValidationError(f"{key} must be a JSON array") + return [as_json_object(item) for item in raw_conversations if isinstance(item, dict)] + + if payload_object.get("mapping") is not None or payload_object.get("messages") is not None: + return [payload_object] + + raise ChatGPTImportValidationError( + "ChatGPT payload must include one of: conversations, items, records, mapping, or messages" + ) + + +def _extract_workspace_metadata(payload: object) -> tuple[str | None, str | None, str | None]: + if not isinstance(payload, dict): + return None, None, None + + payload_object = as_json_object(payload) + fixture_id = normalize_optional_text(payload_object.get("fixture_id")) + workspace_payload = as_json_object(payload_object.get("workspace")) + + workspace_id = normalize_optional_text( + workspace_payload.get("id") + ) + workspace_name = normalize_optional_text( + workspace_payload.get("name") + ) + + return fixture_id, workspace_id, workspace_name + + +def _conversation_messages(conversation: JsonObject) -> list[JsonObject]: + messages = _extract_messages_from_simple_list(conversation.get("messages")) + if messages: + return messages + return _extract_messages_from_mapping(conversation.get("mapping")) + + +def _message_role(message: JsonObject) -> str | None: + author = as_json_object(message.get("author")) + role = normalize_optional_text(author.get("role")) + if role is not None: + return role.casefold() + direct_role = normalize_optional_text(message.get("role")) + if direct_role is None: + return None + return direct_role.casefold() + + +def _message_text(message: JsonObject) -> str | None: + content_payload = message.get("content") + if isinstance(content_payload, dict): + content = as_json_object(content_payload) + parts = content.get("parts") + normalized = _normalize_message_text(parts) + if normalized is not None: + return normalized + normalized = _normalize_message_text(content.get("text")) + if normalized is not None: + return normalized + + for key in ("text", "message", "content"): + normalized = _normalize_message_text(message.get(key)) + if normalized is not None: + return normalized + + return None + + +def load_chatgpt_payload(source: str | Path) -> ImporterNormalizedBatch: + source_path = Path(source).expanduser().resolve() + if not source_path.exists(): + raise ChatGPTImportValidationError(f"ChatGPT source path does not exist: {source_path}") + + source_files = [source_path] if source_path.is_file() else sorted(source_path.rglob("*.json")) + if not source_files: + raise ChatGPTImportValidationError("no ChatGPT JSON files were found at the source path") + + fixture_id: str | None = None + workspace_id: str | None = None + workspace_name: str | None = None + + items: list[ImporterNormalizedItem] = [] + + for source_file in source_files: + payload = _read_json(source_file) + + maybe_fixture_id, maybe_workspace_id, maybe_workspace_name = _extract_workspace_metadata(payload) + if fixture_id is None: + fixture_id = maybe_fixture_id + if workspace_id is None: + workspace_id = maybe_workspace_id + if workspace_name is None: + workspace_name = maybe_workspace_name + + conversations = _extract_conversations(payload) + for conversation_index, conversation in enumerate(conversations, start=1): + conversation_id = normalize_optional_text( + conversation.get("id") + ) or f"conversation-{conversation_index}" + conversation_title = normalize_optional_text(conversation.get("title")) + conversation_project = normalize_optional_text(conversation.get("project")) + conversation_person = normalize_optional_text(conversation.get("person")) + + messages = _conversation_messages(conversation) + for message_index, message in enumerate(messages, start=1): + role = _message_role(message) + if role in {"system", "assistant", "user"}: + pass + elif role is not None: + continue + + text = _message_text(message) + if text is None: + continue + + object_type, object_text = _resolve_object_type_and_text( + text=text, + type_hint=message.get("object_type"), + ) + + status = parse_optional_status(message.get("status")) or "active" + confidence = parse_optional_confidence(message.get("confidence")) + if confidence is None: + confidence = _DEFAULT_CONFIDENCE + + message_id = normalize_optional_text(message.get("id")) or f"{conversation_id}:{message_index}" + source_item_id = f"{conversation_id}:{message_id}" + + explicit_title = normalize_optional_text(message.get("title")) + if explicit_title is None: + explicit_title = conversation_title + title = _build_title( + object_type=object_type, + text=object_text, + explicit_title=explicit_title, + ) + + body_key = OBJECT_TYPE_TO_BODY_KEY[object_type] + body: JsonObject = { + body_key: object_text, + "raw_import_text": object_text, + "chatgpt_role": role, + "chatgpt_conversation_id": conversation_id, + "chatgpt_message_id": message_id, + } + + source_provenance: JsonObject = { + "thread_id": conversation_id, + "chatgpt_conversation_id": conversation_id, + "chatgpt_message_id": message_id, + } + if role is not None: + source_provenance["chatgpt_role"] = role + if conversation_project is not None: + source_provenance["project"] = conversation_project + if conversation_person is not None: + source_provenance["person"] = conversation_person + + source_event_ids = [f"chatgpt-event:{conversation_id}:{message_id}"] + source_provenance["source_event_ids"] = source_event_ids + + dedupe_payload = merge_json_objects( + { + "workspace_id": workspace_id or source_path.stem, + "conversation_id": conversation_id, + "message_id": message_id, + "object_type": object_type, + "status": status, + "title": title, + "body": body, + }, + source_provenance, + ) + + items.append( + ImporterNormalizedItem( + source_item_id=source_item_id, + source_file=source_file.name, + object_type=object_type, + status=status, + raw_content=_build_raw_content(object_type=object_type, text=object_text), + title=title, + body=body, + confidence=confidence, + source_provenance=source_provenance, + dedupe_key=dedupe_key_for_payload(dedupe_payload), + ) + ) + + if not items: + raise ChatGPTImportValidationError("ChatGPT source did not contain any importable messages") + + resolved_workspace_id = workspace_id or f"chatgpt-{source_path.stem}" + return ImporterNormalizedBatch( + context=ImporterWorkspaceContext( + fixture_id=fixture_id, + workspace_id=resolved_workspace_id, + workspace_name=workspace_name, + source_path=str(source_path), + ), + items=items, + ) + + +def import_chatgpt_source( + store: ContinuityStore, + *, + user_id: UUID, + source: str | Path, +) -> JsonObject: + batch = load_chatgpt_payload(source) + return import_normalized_batch( + store, + user_id=user_id, + batch=batch, + config=ImportPersistenceConfig( + source_kind="chatgpt_import", + source_prefix="chatgpt", + admission_reason="chatgpt_import", + dedupe_key_field="chatgpt_dedupe_key", + dedupe_posture=_DEFAULT_DEDUPE_POSTURE, + ), + ) + + +__all__ = ["ChatGPTImportValidationError", "import_chatgpt_source", "load_chatgpt_payload"] diff --git a/apps/api/src/alicebot_api/importer_models.py b/apps/api/src/alicebot_api/importer_models.py new file mode 100644 index 0000000..680871f --- /dev/null +++ b/apps/api/src/alicebot_api/importer_models.py @@ -0,0 +1,283 @@ +from __future__ import annotations + +from dataclasses import dataclass +from hashlib import sha256 +import json + +from alicebot_api.store import JsonObject + + +CONTINUITY_IMPORT_STATUSES = { + "active", + "stale", + "completed", + "cancelled", + "superseded", +} + +CONTINUITY_IMPORT_OBJECT_TYPES = { + "Decision", + "NextAction", + "Commitment", + "WaitingFor", + "Blocker", + "MemoryFact", + "Note", +} + +OBJECT_TYPE_TO_EXPLICIT_SIGNAL: dict[str, str] = { + "Decision": "decision", + "NextAction": "next_action", + "Commitment": "commitment", + "WaitingFor": "waiting_for", + "Blocker": "blocker", + "MemoryFact": "remember_this", + "Note": "note", +} + +OBJECT_TYPE_TO_BODY_KEY: dict[str, str] = { + "Note": "body", + "MemoryFact": "fact_text", + "Decision": "decision_text", + "Commitment": "commitment_text", + "WaitingFor": "waiting_for_text", + "Blocker": "blocking_reason", + "NextAction": "action_text", +} + +OBJECT_TYPE_TO_PREFIX: dict[str, str] = { + "Decision": "Decision", + "Commitment": "Commitment", + "WaitingFor": "Waiting For", + "Blocker": "Blocker", + "NextAction": "Next Action", + "MemoryFact": "Memory Fact", + "Note": "Note", +} + +_TYPE_ALIAS_TO_OBJECT_TYPE: dict[str, str] = { + "decision": "Decision", + "decisions": "Decision", + "task": "NextAction", + "next": "NextAction", + "next_action": "NextAction", + "nextaction": "NextAction", + "action": "NextAction", + "commitment": "Commitment", + "waiting": "WaitingFor", + "waiting_for": "WaitingFor", + "waitingfor": "WaitingFor", + "blocker": "Blocker", + "fact": "MemoryFact", + "memory_fact": "MemoryFact", + "memory": "MemoryFact", + "note": "Note", +} + + +class ImporterValidationError(ValueError): + """Raised when an importer source payload is invalid.""" + + +@dataclass(frozen=True, slots=True) +class ImporterWorkspaceContext: + fixture_id: str | None + workspace_id: str + workspace_name: str | None + source_path: str + + +@dataclass(frozen=True, slots=True) +class ImporterNormalizedItem: + source_item_id: str + source_file: str + object_type: str + status: str + raw_content: str + title: str + body: JsonObject + confidence: float + source_provenance: JsonObject + dedupe_key: str + + +@dataclass(frozen=True, slots=True) +class ImporterNormalizedBatch: + context: ImporterWorkspaceContext + items: list[ImporterNormalizedItem] + + +def normalize_optional_text(value: object) -> str | None: + if not isinstance(value, str): + return None + normalized = " ".join(value.split()).strip() + if normalized == "": + return None + return normalized + + +def normalize_required_text(value: object, *, field_name: str) -> str: + normalized = normalize_optional_text(value) + if normalized is None: + raise ImporterValidationError(f"{field_name} must be a non-empty string") + return normalized + + +def normalize_object_type(value: object, *, default: str = "Note") -> str: + normalized = normalize_optional_text(value) + if normalized is None: + return default + + if normalized in CONTINUITY_IMPORT_OBJECT_TYPES: + return normalized + + lowered = normalized.casefold().replace("-", "_").replace(" ", "_") + return _TYPE_ALIAS_TO_OBJECT_TYPE.get(lowered, default) + + +def parse_optional_confidence(value: object) -> float | None: + if value is None: + return None + + if isinstance(value, bool): + raise ImporterValidationError("confidence must be a number") + + if isinstance(value, (int, float)): + parsed = float(value) + elif isinstance(value, str): + stripped = value.strip() + if stripped == "": + return None + try: + parsed = float(stripped) + except ValueError as exc: + raise ImporterValidationError("confidence must be a number") from exc + else: + raise ImporterValidationError("confidence must be a number") + + if parsed < 0.0 or parsed > 1.0: + raise ImporterValidationError("confidence must be between 0.0 and 1.0") + return parsed + + +def parse_optional_status(value: object) -> str | None: + normalized = normalize_optional_text(value) + if normalized is None: + return None + lowered = normalized.casefold() + if lowered not in CONTINUITY_IMPORT_STATUSES: + supported = ", ".join(sorted(CONTINUITY_IMPORT_STATUSES)) + raise ImporterValidationError( + f"status must be one of: {supported}" + ) + return lowered + + +def ensure_json_object(value: object, *, field_name: str) -> JsonObject: + if not isinstance(value, dict): + raise ImporterValidationError(f"{field_name} must be a JSON object") + return value + + +def canonicalize_json(value: object) -> object: + if isinstance(value, dict): + return { + str(key): canonicalize_json(value[key]) + for key in sorted(value) + } + if isinstance(value, list): + return [canonicalize_json(item) for item in value] + return value + + +def canonical_json_string(value: object) -> str: + return json.dumps( + canonicalize_json(value), + sort_keys=True, + separators=(",", ":"), + ensure_ascii=True, + ) + + +def dedupe_key_for_payload(value: object) -> str: + return sha256(canonical_json_string(value).encode("utf-8")).hexdigest() + + +def as_json_object(value: object) -> JsonObject: + if not isinstance(value, dict): + return {} + output: JsonObject = {} + for key, child in value.items(): + if not isinstance(key, str): + continue + output[key] = _as_json_value(child) + return output + + +def _as_json_value(value: object): + if value is None or isinstance(value, (str, int, float, bool)): + return value + if isinstance(value, list): + return [_as_json_value(item) for item in value] + if isinstance(value, dict): + return as_json_object(value) + return str(value) + + +def merge_json_objects(*payloads: JsonObject) -> JsonObject: + merged: JsonObject = {} + for payload in payloads: + merged.update(payload) + return merged + + +def pick_first_text(*candidates: object) -> str | None: + for candidate in candidates: + normalized = normalize_optional_text(candidate) + if normalized is not None: + return normalized + return None + + +def to_string_list(value: object) -> list[str]: + if isinstance(value, str): + normalized = normalize_optional_text(value) + return [] if normalized is None else [normalized] + + if isinstance(value, list): + items: list[str] = [] + seen: set[str] = set() + for raw in value: + normalized = normalize_optional_text(raw) + if normalized is None or normalized in seen: + continue + items.append(normalized) + seen.add(normalized) + return items + + return [] + + +__all__ = [ + "CONTINUITY_IMPORT_OBJECT_TYPES", + "CONTINUITY_IMPORT_STATUSES", + "ImporterNormalizedBatch", + "ImporterNormalizedItem", + "ImporterValidationError", + "ImporterWorkspaceContext", + "OBJECT_TYPE_TO_BODY_KEY", + "OBJECT_TYPE_TO_EXPLICIT_SIGNAL", + "OBJECT_TYPE_TO_PREFIX", + "as_json_object", + "canonical_json_string", + "dedupe_key_for_payload", + "ensure_json_object", + "merge_json_objects", + "normalize_object_type", + "normalize_optional_text", + "normalize_required_text", + "parse_optional_confidence", + "parse_optional_status", + "pick_first_text", + "to_string_list", +] diff --git a/apps/api/src/alicebot_api/importers/__init__.py b/apps/api/src/alicebot_api/importers/__init__.py new file mode 100644 index 0000000..80b4c9a --- /dev/null +++ b/apps/api/src/alicebot_api/importers/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from alicebot_api.importers.common import ImportPersistenceConfig, import_normalized_batch + + +__all__ = ["ImportPersistenceConfig", "import_normalized_batch"] diff --git a/apps/api/src/alicebot_api/importers/common.py b/apps/api/src/alicebot_api/importers/common.py new file mode 100644 index 0000000..d343f26 --- /dev/null +++ b/apps/api/src/alicebot_api/importers/common.py @@ -0,0 +1,158 @@ +from __future__ import annotations + +from dataclasses import dataclass +from uuid import UUID + +from alicebot_api.importer_models import ( + ImporterNormalizedBatch, + OBJECT_TYPE_TO_EXPLICIT_SIGNAL, + to_string_list, +) +from alicebot_api.store import ContinuityStore, JsonObject + + +@dataclass(frozen=True, slots=True) +class ImportPersistenceConfig: + source_kind: str + source_prefix: str + admission_reason: str + dedupe_key_field: str + dedupe_posture: str + + +def _existing_dedupe_keys( + store: ContinuityStore, + *, + source_kind: str, + dedupe_key_field: str, +) -> set[str]: + dedupe_keys: set[str] = set() + for row in store.list_continuity_recall_candidates(): + provenance = row["provenance"] + if not isinstance(provenance, dict): + continue + if provenance.get("source_kind") != source_kind: + continue + dedupe_key = provenance.get(dedupe_key_field) + if isinstance(dedupe_key, str) and dedupe_key.strip() != "": + dedupe_keys.add(dedupe_key) + return dedupe_keys + + +def _deterministic_source_event_id(*, source_kind: str, workspace_id: str, source_item_id: str) -> str: + return f"{source_kind}:{workspace_id}:{source_item_id}" + + +def _build_provenance( + *, + batch: ImporterNormalizedBatch, + source_file: str, + source_item_id: str, + source_provenance: JsonObject, + source_dedupe_key: str, + source_event_ids: list[str], + config: ImportPersistenceConfig, +) -> JsonObject: + source_prefix = config.source_prefix + return { + **source_provenance, + "source_event_ids": source_event_ids, + "source_kind": config.source_kind, + f"{source_prefix}_workspace_id": batch.context.workspace_id, + f"{source_prefix}_workspace_name": batch.context.workspace_name, + f"{source_prefix}_fixture_id": batch.context.fixture_id, + f"{source_prefix}_source_path": batch.context.source_path, + f"{source_prefix}_source_file": source_file, + f"{source_prefix}_source_item_id": source_item_id, + config.dedupe_key_field: source_dedupe_key, + f"{source_prefix}_dedupe_posture": config.dedupe_posture, + } + + +def import_normalized_batch( + store: ContinuityStore, + *, + user_id: UUID, + batch: ImporterNormalizedBatch, + config: ImportPersistenceConfig, +) -> JsonObject: + del user_id + + existing_dedupe_keys = _existing_dedupe_keys( + store, + source_kind=config.source_kind, + dedupe_key_field=config.dedupe_key_field, + ) + run_dedupe_keys: set[str] = set() + + imported_object_ids: list[str] = [] + imported_capture_ids: list[str] = [] + skipped_duplicates = 0 + + for item in batch.items: + if item.dedupe_key in existing_dedupe_keys or item.dedupe_key in run_dedupe_keys: + skipped_duplicates += 1 + continue + + run_dedupe_keys.add(item.dedupe_key) + + capture = store.create_continuity_capture_event( + raw_content=item.raw_content, + explicit_signal=OBJECT_TYPE_TO_EXPLICIT_SIGNAL[item.object_type], + admission_posture="DERIVED", + admission_reason=config.admission_reason, + ) + + source_event_ids = to_string_list(item.source_provenance.get("source_event_ids")) + if not source_event_ids: + source_event_ids = [ + _deterministic_source_event_id( + source_kind=config.source_kind, + workspace_id=batch.context.workspace_id, + source_item_id=item.source_item_id, + ) + ] + + provenance = _build_provenance( + batch=batch, + source_file=item.source_file, + source_item_id=item.source_item_id, + source_provenance=item.source_provenance, + source_dedupe_key=item.dedupe_key, + source_event_ids=source_event_ids, + config=config, + ) + + continuity_object = store.create_continuity_object( + capture_event_id=capture["id"], + object_type=item.object_type, + status=item.status, + title=item.title, + body=item.body, + provenance=provenance, + confidence=item.confidence, + ) + + imported_capture_ids.append(str(capture["id"])) + imported_object_ids.append(str(continuity_object["id"])) + + imported_count = len(imported_object_ids) + status = "ok" if imported_count > 0 else "noop" + + return { + "status": status, + "source_path": batch.context.source_path, + "fixture_id": batch.context.fixture_id, + "workspace_id": batch.context.workspace_id, + "workspace_name": batch.context.workspace_name, + "total_candidates": len(batch.items), + "imported_count": imported_count, + "skipped_duplicates": skipped_duplicates, + "dedupe_posture": config.dedupe_posture, + "provenance_source_kind": config.source_kind, + "imported_capture_event_ids": imported_capture_ids, + "imported_object_ids": imported_object_ids, + } + + +__all__ = ["ImportPersistenceConfig", "import_normalized_batch"] diff --git a/apps/api/src/alicebot_api/markdown_import.py b/apps/api/src/alicebot_api/markdown_import.py new file mode 100644 index 0000000..c8634e3 --- /dev/null +++ b/apps/api/src/alicebot_api/markdown_import.py @@ -0,0 +1,335 @@ +from __future__ import annotations + +from pathlib import Path +import re +from uuid import UUID + +from alicebot_api.importer_models import ( + ImporterNormalizedBatch, + ImporterNormalizedItem, + ImporterValidationError, + ImporterWorkspaceContext, + OBJECT_TYPE_TO_BODY_KEY, + OBJECT_TYPE_TO_PREFIX, + dedupe_key_for_payload, + merge_json_objects, + normalize_object_type, + normalize_optional_text, + parse_optional_confidence, + parse_optional_status, +) +from alicebot_api.importers.common import ImportPersistenceConfig, import_normalized_batch +from alicebot_api.store import ContinuityStore, JsonObject + + +_DEFAULT_CONFIDENCE = 0.84 +_DEFAULT_DEDUPE_POSTURE = "workspace_and_line_fingerprint" +_PREFIX_TO_OBJECT_TYPE: tuple[tuple[str, str], ...] = ( + ("decision:", "Decision"), + ("next action:", "NextAction"), + ("next:", "NextAction"), + ("task:", "NextAction"), + ("commitment:", "Commitment"), + ("waiting for:", "WaitingFor"), + ("blocker:", "Blocker"), + ("fact:", "MemoryFact"), + ("remember:", "MemoryFact"), + ("note:", "Note"), +) + + +class MarkdownImportValidationError(ImporterValidationError): + """Raised when a markdown import payload is invalid.""" + + +def _truncate(value: str, *, max_length: int) -> str: + if len(value) <= max_length: + return value + return value[: max_length - 3].rstrip() + "..." + + +def _build_title(*, object_type: str, text: str, explicit_title: str | None) -> str: + if explicit_title is not None: + return _truncate(explicit_title, max_length=280) + prefix = OBJECT_TYPE_TO_PREFIX[object_type] + return _truncate(f"{prefix}: {text}", max_length=280) + + +def _build_raw_content(*, object_type: str, text: str) -> str: + prefix = OBJECT_TYPE_TO_PREFIX[object_type] + return f"{prefix}: {text}" + + +def _strip_list_prefix(line: str) -> str: + stripped = line.strip() + if stripped.startswith("- ") or stripped.startswith("* "): + return stripped[2:].strip() + numbered = re.match(r"^\d+\.\s+(.*)$", stripped) + if numbered: + return numbered.group(1).strip() + return stripped + + +def _parse_frontmatter(raw_text: str) -> tuple[dict[str, str], list[str]]: + lines = raw_text.splitlines() + if not lines or lines[0].strip() != "---": + return {}, lines + + metadata: dict[str, str] = {} + closing_index = -1 + for index in range(1, len(lines)): + line = lines[index].strip() + if line == "---": + closing_index = index + break + if line == "" or line.startswith("#"): + continue + if ":" not in line: + raise MarkdownImportValidationError("frontmatter lines must use key: value format") + key, value = line.split(":", 1) + normalized_key = normalize_optional_text(key) + normalized_value = normalize_optional_text(value) + if normalized_key is None or normalized_value is None: + continue + metadata[normalized_key.casefold().replace("-", "_")] = normalized_value + + if closing_index == -1: + raise MarkdownImportValidationError("markdown frontmatter must be closed with ---") + + return metadata, lines[closing_index + 1 :] + + +def _read_markdown_source(source: str | Path) -> tuple[Path, list[Path]]: + source_path = Path(source).expanduser().resolve() + if not source_path.exists(): + raise MarkdownImportValidationError(f"markdown source path does not exist: {source_path}") + + if source_path.is_file(): + if source_path.suffix.casefold() != ".md": + raise MarkdownImportValidationError("markdown source file must end with .md") + return source_path, [source_path] + + files = sorted( + path + for path in source_path.rglob("*.md") + if path.is_file() + ) + if not files: + raise MarkdownImportValidationError(f"no markdown files were found at {source_path}") + return source_path, files + + +def _resolve_object_type_and_text(*, text: str, type_hint: str | None) -> tuple[str, str]: + if type_hint is not None: + return normalize_object_type(type_hint), text + + lowered = text.casefold() + for prefix, object_type in _PREFIX_TO_OBJECT_TYPE: + if not lowered.startswith(prefix): + continue + stripped = normalize_optional_text(text[len(prefix) :]) + if stripped is None: + raise MarkdownImportValidationError("markdown entry content must not be empty") + return object_type, stripped + + return "Note", text + + +def _parse_line_tags(line: str) -> tuple[str, dict[str, str]]: + segments = [segment.strip() for segment in line.split("|")] + text_segment = segments[0] + tags: dict[str, str] = {} + for segment in segments[1:]: + if "=" not in segment: + continue + key, value = segment.split("=", 1) + normalized_key = normalize_optional_text(key) + normalized_value = normalize_optional_text(value) + if normalized_key is None or normalized_value is None: + continue + tags[normalized_key.casefold().replace("-", "_")] = normalized_value + return text_segment, tags + + +def _merge_source_event_ids(*, existing: list[str], maybe_csv: str | None, single: str | None) -> list[str]: + output = list(existing) + seen = set(output) + + if maybe_csv is not None: + for part in maybe_csv.split(","): + normalized = normalize_optional_text(part) + if normalized is None or normalized in seen: + continue + output.append(normalized) + seen.add(normalized) + + normalized_single = normalize_optional_text(single) + if normalized_single is not None and normalized_single not in seen: + output.append(normalized_single) + + return output + + +def load_markdown_payload(source: str | Path) -> ImporterNormalizedBatch: + source_path, markdown_files = _read_markdown_source(source) + + fixture_id: str | None = None + workspace_id: str | None = None + workspace_name: str | None = None + default_status: str = "active" + default_confidence = _DEFAULT_CONFIDENCE + default_scope: JsonObject = {} + + items: list[ImporterNormalizedItem] = [] + + for file_path in markdown_files: + raw_text = file_path.read_text(encoding="utf-8") + metadata, lines = _parse_frontmatter(raw_text) + + if fixture_id is None: + fixture_id = normalize_optional_text(metadata.get("fixture_id")) + if workspace_id is None: + workspace_id = normalize_optional_text(metadata.get("workspace_id")) + if workspace_name is None: + workspace_name = normalize_optional_text(metadata.get("workspace_name")) + + maybe_default_status = parse_optional_status(metadata.get("default_status")) + if maybe_default_status is not None: + default_status = maybe_default_status + + maybe_default_confidence = parse_optional_confidence(metadata.get("default_confidence")) + if maybe_default_confidence is not None: + default_confidence = maybe_default_confidence + + file_scope: JsonObject = {} + for key in ("thread_id", "task_id", "project", "person", "confirmation_status"): + value = normalize_optional_text(metadata.get(key)) + if value is not None: + file_scope[key] = value if key != "confirmation_status" else value.casefold() + + for line_number, raw_line in enumerate(lines, start=1): + stripped = _strip_list_prefix(raw_line) + normalized_line = normalize_optional_text(stripped) + if normalized_line is None: + continue + if normalized_line.startswith("#"): + continue + + content_segment, tags = _parse_line_tags(normalized_line) + normalized_content = normalize_optional_text(content_segment) + if normalized_content is None: + continue + + object_type, object_text = _resolve_object_type_and_text( + text=normalized_content, + type_hint=tags.get("type"), + ) + status = parse_optional_status(tags.get("status")) or default_status + confidence = parse_optional_confidence(tags.get("confidence")) + if confidence is None: + confidence = default_confidence + + source_item_id = normalize_optional_text(tags.get("id")) or f"{file_path.name}:{line_number}" + title = _build_title( + object_type=object_type, + text=object_text, + explicit_title=normalize_optional_text(tags.get("title")), + ) + + body_key = OBJECT_TYPE_TO_BODY_KEY[object_type] + body: JsonObject = { + body_key: object_text, + "raw_import_text": object_text, + "markdown_raw_line": raw_line, + "markdown_line_number": line_number, + "markdown_source_file": file_path.name, + } + + source_provenance = merge_json_objects( + default_scope, + file_scope, + { + "markdown_source_relpath": str(file_path.relative_to(source_path)) + if source_path.is_dir() + else file_path.name, + }, + ) + + for key in ("thread_id", "task_id", "project", "person", "confirmation_status"): + value = normalize_optional_text(tags.get(key)) + if value is None: + continue + source_provenance[key] = value if key != "confirmation_status" else value.casefold() + + source_event_ids = _merge_source_event_ids( + existing=[], + maybe_csv=tags.get("source_event_ids"), + single=tags.get("source_event_id"), + ) + if source_event_ids: + source_provenance["source_event_ids"] = source_event_ids + + dedupe_payload: JsonObject = { + "workspace_id": workspace_id or source_path.stem, + "object_type": object_type, + "status": status, + "title": title, + "body": { + body_key: object_text, + "raw_import_text": object_text, + }, + "source_provenance": source_provenance, + } + + items.append( + ImporterNormalizedItem( + source_item_id=source_item_id, + source_file=file_path.name, + object_type=object_type, + status=status, + raw_content=_build_raw_content(object_type=object_type, text=object_text), + title=title, + body=body, + confidence=confidence, + source_provenance=source_provenance, + dedupe_key=dedupe_key_for_payload(dedupe_payload), + ) + ) + + resolved_workspace_id = workspace_id or source_path.stem + if not items: + raise MarkdownImportValidationError("markdown source did not contain any importable entries") + + return ImporterNormalizedBatch( + context=ImporterWorkspaceContext( + fixture_id=fixture_id, + workspace_id=resolved_workspace_id, + workspace_name=workspace_name, + source_path=str(source_path), + ), + items=items, + ) + + +def import_markdown_source( + store: ContinuityStore, + *, + user_id: UUID, + source: str | Path, +) -> JsonObject: + batch = load_markdown_payload(source) + return import_normalized_batch( + store, + user_id=user_id, + batch=batch, + config=ImportPersistenceConfig( + source_kind="markdown_import", + source_prefix="markdown", + admission_reason="markdown_import", + dedupe_key_field="markdown_dedupe_key", + dedupe_posture=_DEFAULT_DEDUPE_POSTURE, + ), + ) + + +__all__ = ["MarkdownImportValidationError", "import_markdown_source", "load_markdown_payload"] diff --git a/apps/api/src/alicebot_api/openclaw_import.py b/apps/api/src/alicebot_api/openclaw_import.py index 5fb529a..6b5f9ff 100644 --- a/apps/api/src/alicebot_api/openclaw_import.py +++ b/apps/api/src/alicebot_api/openclaw_import.py @@ -3,37 +3,44 @@ from pathlib import Path from uuid import UUID +from alicebot_api.importer_models import ( + ImporterNormalizedBatch, + ImporterNormalizedItem, + ImporterWorkspaceContext, +) +from alicebot_api.importers.common import ImportPersistenceConfig, import_normalized_batch from alicebot_api.openclaw_adapter import load_openclaw_payload from alicebot_api.store import ContinuityStore, JsonObject -_OBJECT_TYPE_TO_SIGNAL: dict[str, str] = { - "Decision": "decision", - "NextAction": "next_action", - "Commitment": "commitment", - "WaitingFor": "waiting_for", - "Blocker": "blocker", - "MemoryFact": "remember_this", - "Note": "note", -} +_OPENCLAW_DEDUPE_POSTURE = "workspace_and_payload_fingerprint" -def _existing_openclaw_dedupe_keys(store: ContinuityStore) -> set[str]: - dedupe_keys: set[str] = set() - for row in store.list_continuity_recall_candidates(): - provenance = row["provenance"] - if not isinstance(provenance, dict): - continue - if provenance.get("source_kind") != "openclaw_import": - continue - dedupe_key = provenance.get("openclaw_dedupe_key") - if isinstance(dedupe_key, str) and dedupe_key.strip() != "": - dedupe_keys.add(dedupe_key) - return dedupe_keys - - -def _deterministic_source_event_id(*, workspace_id: str, source_item_id: str) -> str: - return f"openclaw:{workspace_id}:{source_item_id}" +def _to_generic_batch(source: str | Path) -> ImporterNormalizedBatch: + batch = load_openclaw_payload(source) + return ImporterNormalizedBatch( + context=ImporterWorkspaceContext( + fixture_id=batch.context.fixture_id, + workspace_id=batch.context.workspace_id, + workspace_name=batch.context.workspace_name, + source_path=batch.context.source_path, + ), + items=[ + ImporterNormalizedItem( + source_item_id=item.source_item_id, + source_file=item.source_file, + object_type=item.object_type, + status=item.status, + raw_content=item.raw_content, + title=item.title, + body=item.body, + confidence=item.confidence, + source_provenance=item.source_provenance, + dedupe_key=item.dedupe_key, + ) + for item in batch.items + ], + ) def import_openclaw_source( @@ -42,83 +49,19 @@ def import_openclaw_source( user_id: UUID, source: str | Path, ) -> JsonObject: - del user_id - - batch = load_openclaw_payload(source) - existing_dedupe_keys = _existing_openclaw_dedupe_keys(store) - run_dedupe_keys: set[str] = set() - - imported_object_ids: list[str] = [] - imported_capture_ids: list[str] = [] - skipped_duplicates = 0 - - for item in batch.items: - if item.dedupe_key in existing_dedupe_keys or item.dedupe_key in run_dedupe_keys: - skipped_duplicates += 1 - continue - - run_dedupe_keys.add(item.dedupe_key) - - capture = store.create_continuity_capture_event( - raw_content=item.raw_content, - explicit_signal=_OBJECT_TYPE_TO_SIGNAL[item.object_type], - admission_posture="DERIVED", + generic_batch = _to_generic_batch(source) + return import_normalized_batch( + store, + user_id=user_id, + batch=generic_batch, + config=ImportPersistenceConfig( + source_kind="openclaw_import", + source_prefix="openclaw", admission_reason="openclaw_import", - ) - - source_event_ids = item.source_provenance.get("source_event_ids") - if not isinstance(source_event_ids, list) or len(source_event_ids) == 0: - source_event_ids = [ - _deterministic_source_event_id( - workspace_id=batch.context.workspace_id, - source_item_id=item.source_item_id, - ) - ] - - provenance: JsonObject = { - **item.source_provenance, - "source_event_ids": source_event_ids, - "source_kind": "openclaw_import", - "openclaw_workspace_id": batch.context.workspace_id, - "openclaw_workspace_name": batch.context.workspace_name, - "openclaw_fixture_id": batch.context.fixture_id, - "openclaw_source_path": batch.context.source_path, - "openclaw_source_file": item.source_file, - "openclaw_source_item_id": item.source_item_id, - "openclaw_dedupe_key": item.dedupe_key, - "openclaw_dedupe_posture": "workspace_and_payload_fingerprint", - } - - continuity_object = store.create_continuity_object( - capture_event_id=capture["id"], - object_type=item.object_type, - status=item.status, - title=item.title, - body=item.body, - provenance=provenance, - confidence=item.confidence, - ) - - imported_capture_ids.append(str(capture["id"])) - imported_object_ids.append(str(continuity_object["id"])) - - imported_count = len(imported_object_ids) - status = "ok" if imported_count > 0 else "noop" - - return { - "status": status, - "source_path": batch.context.source_path, - "fixture_id": batch.context.fixture_id, - "workspace_id": batch.context.workspace_id, - "workspace_name": batch.context.workspace_name, - "total_candidates": len(batch.items), - "imported_count": imported_count, - "skipped_duplicates": skipped_duplicates, - "dedupe_posture": "workspace_and_payload_fingerprint", - "provenance_source_kind": "openclaw_import", - "imported_capture_event_ids": imported_capture_ids, - "imported_object_ids": imported_object_ids, - } + dedupe_key_field="openclaw_dedupe_key", + dedupe_posture=_OPENCLAW_DEDUPE_POSTURE, + ), + ) __all__ = ["import_openclaw_source"] diff --git a/apps/api/src/alicebot_api/retrieval_evaluation.py b/apps/api/src/alicebot_api/retrieval_evaluation.py index df3a857..c3ea514 100644 --- a/apps/api/src/alicebot_api/retrieval_evaluation.py +++ b/apps/api/src/alicebot_api/retrieval_evaluation.py @@ -2,20 +2,27 @@ from dataclasses import dataclass from datetime import UTC, datetime +import json +from pathlib import Path +from typing import Callable from uuid import UUID +from alicebot_api.continuity_resumption import compile_continuity_resumption_brief +from alicebot_api.continuity_review import apply_continuity_correction from alicebot_api.continuity_recall import query_continuity_recall from alicebot_api.contracts import ( RETRIEVAL_EVALUATION_FIXTURE_ORDER, RETRIEVAL_EVALUATION_RESULT_ORDER, + ContinuityCorrectionInput, ContinuityRecallQueryInput, + ContinuityResumptionBriefRequestInput, RetrievalEvaluationStatus, RetrievalEvaluationFixtureResult, RetrievalEvaluationResponse, RetrievalEvaluationSummary, ) from alicebot_api.semantic_retrieval import calculate_mean_precision, calculate_precision_at_k -from alicebot_api.store import ContinuityRecallCandidateRow, ContinuityStore +from alicebot_api.store import ContinuityRecallCandidateRow, ContinuityStore, JsonObject RETRIEVAL_EVALUATION_PRECISION_TARGET = 0.8 @@ -275,3 +282,420 @@ def get_retrieval_evaluation_summary( "fixtures": evaluated_results, "summary": summary, } + + +PHASE9_EVALUATION_SCHEMA_VERSION = "phase9_eval_v1" +PHASE9_EVALUATION_PASS_THRESHOLD = 1.0 + + +@dataclass(frozen=True, slots=True) +class Phase9ImporterDefinition: + importer_name: str + source_kind: str + source_path: Path + project: str + thread_id: UUID | None + recall_query: str + import_fn: Callable[[ContinuityStore, UUID, Path], JsonObject] + + +def _repo_root() -> Path: + return Path(__file__).resolve().parents[4] + + +def _as_int(value: object) -> int: + if isinstance(value, bool): + return int(value) + if isinstance(value, int): + return value + if isinstance(value, float): + return int(value) + if isinstance(value, str): + try: + return int(value.strip()) + except ValueError: + return 0 + return 0 + + +def calculate_phase9_metric_ratio(*, passed_count: int, total_count: int) -> float: + if total_count <= 0: + return 0.0 + return passed_count / total_count + + +def _build_phase9_importer_definitions( + *, + openclaw_source: str | Path | None, + markdown_source: str | Path | None, + chatgpt_source: str | Path | None, +) -> tuple[Phase9ImporterDefinition, ...]: + from alicebot_api.chatgpt_import import import_chatgpt_source + from alicebot_api.markdown_import import import_markdown_source + from alicebot_api.openclaw_import import import_openclaw_source + + repo_root = _repo_root() + resolved_openclaw = Path(openclaw_source) if openclaw_source is not None else ( + repo_root / "fixtures" / "openclaw" / "workspace_v1.json" + ) + resolved_markdown = Path(markdown_source) if markdown_source is not None else ( + repo_root / "fixtures" / "importers" / "markdown" / "workspace_v1.md" + ) + resolved_chatgpt = Path(chatgpt_source) if chatgpt_source is not None else ( + repo_root / "fixtures" / "importers" / "chatgpt" / "workspace_v1.json" + ) + + return ( + Phase9ImporterDefinition( + importer_name="openclaw", + source_kind="openclaw_import", + source_path=resolved_openclaw, + project="Alice Public Core", + thread_id=UUID("cccccccc-cccc-4ccc-8ccc-cccccccccccc"), + recall_query="MCP tool surface", + import_fn=lambda store, user_id, path: import_openclaw_source( + store, + user_id=user_id, + source=path, + ), + ), + Phase9ImporterDefinition( + importer_name="markdown", + source_kind="markdown_import", + source_path=resolved_markdown, + project="Markdown Import Project", + thread_id=UUID("eeeeeeee-eeee-4eee-8eee-eeeeeeeeeeee"), + recall_query="markdown importer deterministic", + import_fn=lambda store, user_id, path: import_markdown_source( + store, + user_id=user_id, + source=path, + ), + ), + Phase9ImporterDefinition( + importer_name="chatgpt", + source_kind="chatgpt_import", + source_path=resolved_chatgpt, + project="ChatGPT Import Project", + thread_id=UUID("bbbbbbbb-bbbb-4bbb-8bbb-bbbbbbbbbbbb"), + recall_query="ChatGPT import provenance explicit", + import_fn=lambda store, user_id, path: import_chatgpt_source( + store, + user_id=user_id, + source=path, + ), + ), + ) + + +def _run_phase9_importer_evidence( + store: ContinuityStore, + *, + user_id: UUID, + definitions: tuple[Phase9ImporterDefinition, ...], +) -> list[JsonObject]: + evidence: list[JsonObject] = [] + for definition in definitions: + first_run = definition.import_fn(store, user_id, definition.source_path) + second_run = definition.import_fn(store, user_id, definition.source_path) + + import_success = ( + first_run.get("status") == "ok" + and _as_int(first_run.get("imported_count")) > 0 + ) + duplicate_posture_ok = ( + second_run.get("status") == "noop" + and _as_int(second_run.get("skipped_duplicates")) == _as_int(first_run.get("total_candidates")) + ) + evidence.append( + { + "importer": definition.importer_name, + "source_kind": definition.source_kind, + "source_path": str(definition.source_path.expanduser().resolve()), + "first_run": first_run, + "second_run": second_run, + "import_success": import_success, + "duplicate_posture_ok": duplicate_posture_ok, + } + ) + return evidence + + +def _run_phase9_recall_precision( + store: ContinuityStore, + *, + user_id: UUID, + definitions: tuple[Phase9ImporterDefinition, ...], +) -> tuple[list[JsonObject], float]: + checks: list[JsonObject] = [] + hit_count = 0 + + for definition in definitions: + payload = query_continuity_recall( + store, + user_id=user_id, + request=ContinuityRecallQueryInput( + query=definition.recall_query, + thread_id=definition.thread_id, + project=definition.project, + limit=5, + ), + ) + top_item = payload["items"][0] if payload["items"] else None + top_source_kind = None + if top_item is not None and isinstance(top_item.get("provenance"), dict): + top_source_kind = top_item["provenance"].get("source_kind") + + hit = top_source_kind == definition.source_kind + if hit: + hit_count += 1 + + checks.append( + { + "importer": definition.importer_name, + "query": definition.recall_query, + "expected_source_kind": definition.source_kind, + "top_source_kind": top_source_kind, + "returned_count": payload["summary"]["returned_count"], + "hit": hit, + } + ) + + precision = calculate_phase9_metric_ratio( + passed_count=hit_count, + total_count=len(definitions), + ) + return checks, precision + + +def _run_phase9_resumption_usefulness( + store: ContinuityStore, + *, + user_id: UUID, + definitions: tuple[Phase9ImporterDefinition, ...], +) -> tuple[list[JsonObject], float]: + checks: list[JsonObject] = [] + useful_count = 0 + + for definition in definitions: + payload = compile_continuity_resumption_brief( + store, + user_id=user_id, + request=ContinuityResumptionBriefRequestInput( + query=None, + thread_id=definition.thread_id, + project=definition.project, + max_recent_changes=5, + max_open_loops=5, + ), + ) + brief = payload["brief"] + last_decision = brief["last_decision"]["item"] + next_action = brief["next_action"]["item"] + last_source_kind = ( + None + if last_decision is None + else last_decision["provenance"].get("source_kind") + ) + next_source_kind = ( + None + if next_action is None + else next_action["provenance"].get("source_kind") + ) + useful = ( + last_decision is not None + and next_action is not None + and last_source_kind == definition.source_kind + and next_source_kind == definition.source_kind + ) + if useful: + useful_count += 1 + checks.append( + { + "importer": definition.importer_name, + "expected_source_kind": definition.source_kind, + "last_decision_source_kind": last_source_kind, + "next_action_source_kind": next_source_kind, + "useful": useful, + } + ) + + usefulness_rate = calculate_phase9_metric_ratio( + passed_count=useful_count, + total_count=len(definitions), + ) + return checks, usefulness_rate + + +def _run_phase9_correction_effectiveness( + store: ContinuityStore, + *, + user_id: UUID, + target_definition: Phase9ImporterDefinition, +) -> JsonObject: + before = query_continuity_recall( + store, + user_id=user_id, + request=ContinuityRecallQueryInput( + query=target_definition.recall_query, + thread_id=target_definition.thread_id, + project=target_definition.project, + limit=5, + ), + ) + if not before["items"]: + return { + "target_importer": target_definition.importer_name, + "effective": False, + "reason": "no_recall_items_before_correction", + } + + before_top = before["items"][0] + before_top_id = str(before_top["id"]) + before_provenance = before_top.get("provenance") + replacement_provenance = ( + dict(before_provenance) + if isinstance(before_provenance, dict) + else {} + ) + replacement_provenance["phase9_eval_correction"] = "supersede_verification" + + correction = apply_continuity_correction( + store, + user_id=user_id, + continuity_object_id=UUID(before_top_id), + request=ContinuityCorrectionInput( + action="supersede", + reason="phase9_eval_correction_effectiveness", + replacement_title="Decision: Keep MCP tool surface narrow after correction verification.", + replacement_body={ + "decision_text": "Keep MCP tool surface narrow after correction verification.", + }, + replacement_provenance=replacement_provenance, + replacement_confidence=0.99, + ), + ) + + replacement_object = correction["replacement_object"] + replacement_id = None if replacement_object is None else replacement_object["id"] + + after = query_continuity_recall( + store, + user_id=user_id, + request=ContinuityRecallQueryInput( + query=target_definition.recall_query, + thread_id=target_definition.thread_id, + project=target_definition.project, + limit=5, + ), + ) + after_top_id = None if not after["items"] else str(after["items"][0]["id"]) + effective = ( + replacement_id is not None + and after_top_id == replacement_id + and after_top_id != before_top_id + ) + + return { + "target_importer": target_definition.importer_name, + "before_top_id": before_top_id, + "replacement_id": replacement_id, + "after_top_id": after_top_id, + "effective": effective, + } + + +def run_phase9_evaluation( + store: ContinuityStore, + *, + user_id: UUID, + openclaw_source: str | Path | None = None, + markdown_source: str | Path | None = None, + chatgpt_source: str | Path | None = None, +) -> JsonObject: + definitions = _build_phase9_importer_definitions( + openclaw_source=openclaw_source, + markdown_source=markdown_source, + chatgpt_source=chatgpt_source, + ) + + importer_runs = _run_phase9_importer_evidence( + store, + user_id=user_id, + definitions=definitions, + ) + recall_checks, recall_precision = _run_phase9_recall_precision( + store, + user_id=user_id, + definitions=definitions, + ) + resumption_checks, resumption_usefulness = _run_phase9_resumption_usefulness( + store, + user_id=user_id, + definitions=definitions, + ) + correction_check = _run_phase9_correction_effectiveness( + store, + user_id=user_id, + target_definition=definitions[0], + ) + + importer_success_count = sum(1 for run in importer_runs if run["import_success"] is True) + duplicate_posture_count = sum(1 for run in importer_runs if run["duplicate_posture_ok"] is True) + importer_total = len(importer_runs) + + importer_success_rate = calculate_phase9_metric_ratio( + passed_count=importer_success_count, + total_count=importer_total, + ) + duplicate_posture_rate = calculate_phase9_metric_ratio( + passed_count=duplicate_posture_count, + total_count=importer_total, + ) + correction_effectiveness_rate = 1.0 if correction_check["effective"] is True else 0.0 + + threshold = PHASE9_EVALUATION_PASS_THRESHOLD + status = ( + "pass" + if ( + importer_success_rate >= threshold + and duplicate_posture_rate >= threshold + and recall_precision >= threshold + and resumption_usefulness >= threshold + and correction_effectiveness_rate >= threshold + ) + else "fail" + ) + + return { + "schema_version": PHASE9_EVALUATION_SCHEMA_VERSION, + "generated_at": datetime.now(UTC).isoformat(), + "summary": { + "status": status, + "importer_count": importer_total, + "importer_success_rate": importer_success_rate, + "duplicate_posture_rate": duplicate_posture_rate, + "recall_precision_at_1": recall_precision, + "resumption_usefulness_rate": resumption_usefulness, + "correction_effectiveness_rate": correction_effectiveness_rate, + "pass_threshold": threshold, + }, + "importer_runs": importer_runs, + "recall_precision_checks": recall_checks, + "resumption_usefulness_checks": resumption_checks, + "correction_effectiveness": correction_check, + } + + +def write_phase9_evaluation_report( + *, + report: JsonObject, + report_path: str | Path, +) -> Path: + output_path = Path(report_path).expanduser().resolve() + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text( + json.dumps(report, indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + return output_path diff --git a/apps/web/components/approval-detail.test.tsx b/apps/web/components/approval-detail.test.tsx index 2a448cd..7a8fdf0 100644 --- a/apps/web/components/approval-detail.test.tsx +++ b/apps/web/components/approval-detail.test.tsx @@ -188,7 +188,7 @@ describe("ApprovalDetail", () => { }); expect(refreshMock).toHaveBeenCalledTimes(1); - expect(screen.getByText("ADD persisted at revision 1.")).toBeInTheDocument(); + expect(await screen.findByText("ADD persisted at revision 1.")).toBeInTheDocument(); }); it("shows validation feedback for invalid JSON before submitting", () => { diff --git a/apps/web/components/continuity-open-loops-panel.test.tsx b/apps/web/components/continuity-open-loops-panel.test.tsx index 8d65a69..4828733 100644 --- a/apps/web/components/continuity-open-loops-panel.test.tsx +++ b/apps/web/components/continuity-open-loops-panel.test.tsx @@ -185,8 +185,10 @@ describe("ContinuityOpenLoopsPanel", () => { ); }); - expect(refreshMock).toHaveBeenCalled(); - expect(screen.getByText(/Lifecycle is now completed/i)).toBeInTheDocument(); + await waitFor(() => { + expect(refreshMock).toHaveBeenCalled(); + }); + expect(await screen.findByText(/Lifecycle is now completed/i)).toBeInTheDocument(); }); it("renders explicit fallback when dashboard payload is absent", () => { diff --git a/apps/web/components/workflow-memory-writeback-form.tsx b/apps/web/components/workflow-memory-writeback-form.tsx index e4a6e08..1ccc6f7 100644 --- a/apps/web/components/workflow-memory-writeback-form.tsx +++ b/apps/web/components/workflow-memory-writeback-form.tsx @@ -95,10 +95,6 @@ export function WorkflowMemoryWritebackForm({ ); useEffect(() => { - if (isSubmitting) { - return; - } - setStatusTone("info"); setStatusText( defaultStatusText({ @@ -108,7 +104,7 @@ export function WorkflowMemoryWritebackForm({ hasPreview: Boolean(preview), }), ); - }, [evidenceEventIds.length, isSubmitting, liveModeReady, preview, source]); + }, [evidenceEventIds.length, liveModeReady, preview, source]); async function handleSubmit(event: FormEvent) { event.preventDefault(); diff --git a/docs/adr/ADR-005-import-provenance-and-dedupe-strategy.md b/docs/adr/ADR-005-import-provenance-and-dedupe-strategy.md new file mode 100644 index 0000000..e6aadf9 --- /dev/null +++ b/docs/adr/ADR-005-import-provenance-and-dedupe-strategy.md @@ -0,0 +1,45 @@ +# ADR-005: Import Provenance and Dedupe Strategy + +## Status + +Accepted (2026-04-08) + +## Context + +`P9-S37` broadens importer coverage from a single OpenClaw adapter to multiple production-usable import paths. Without one shared persistence strategy, importer behavior can drift on provenance fields, dedupe semantics, and replay outcomes. + +The sprint requires deterministic duplicate-memory posture and explicit provenance across every shipped importer. + +## Decision + +Adopt one shared importer persistence strategy for all shipped `P9-S37` importers: + +- all importer writes go through one shared persistence seam (`importers/common.py`) +- each importer must persist explicit `source_kind` +- each importer must persist a source-specific deterministic dedupe key in provenance (`_dedupe_key`) +- each importer must persist source-specific context metadata (`_workspace_id`, `_source_path`, `_source_item_id`, etc.) +- dedupe posture is deterministic and measured by replaying the same fixture and expecting `status=noop` with full duplicate skip counts +- importers map into the same shipped continuity capture/object model; they do not introduce source-specific retrieval semantics + +## Consequences + +Positive: + +- importer behavior stays consistent and auditable across OpenClaw, Markdown, and ChatGPT import paths +- dedupe and provenance posture are testable with one shared expectation model +- future importer additions can reuse the same persistence contract + +Negative: + +- importer-specific provenance key names remain source-prefixed, so field vocabulary is intentionally explicit rather than fully normalized +- importer adapters still need source-specific normalization logic before shared persistence + +## Alternatives Considered + +### Keep per-importer persistence logic fully separate + +Rejected because it encourages dedupe/provenance drift and makes cross-importer evaluation less reliable. + +### Normalize all importer provenance into one unprefixed schema immediately + +Rejected in `P9-S37` because it increases migration risk and coupling without improving short-term reproducibility goals. diff --git a/docs/adr/ADR-007-public-evaluation-harness-scope.md b/docs/adr/ADR-007-public-evaluation-harness-scope.md new file mode 100644 index 0000000..6d48894 --- /dev/null +++ b/docs/adr/ADR-007-public-evaluation-harness-scope.md @@ -0,0 +1,48 @@ +# ADR-007: Public Evaluation Harness Scope + +## Status + +Accepted (2026-04-08) + +## Context + +`P9-S37` needs reproducible evidence that importer-expanded continuity data improves useful recall/resumption outcomes and remains correction-aware. Prior retrieval evaluation fixtures exist, but importer and correction posture claims require a sprint-specific harness with fixture-backed import replay. + +## Decision + +Define the `P9-S37` public evaluation harness scope as local, fixture-backed, and command-driven: + +- shipped command: `./scripts/run_phase9_eval.sh` +- shipped fixture inputs: OpenClaw, Markdown, and ChatGPT fixture sources in-repo +- shipped report outputs: JSON reports under `eval/reports/` and committed baseline under `eval/baselines/` +- required measured metrics: + - importer success rate + - duplicate-memory posture rate + - recall precision-at-1 on importer-scoped queries + - resumption usefulness rate (decision + next-action usefulness in scoped briefs) + - correction effectiveness rate (supersede correction changing top recall result) + +Harness scope is intentionally local-first and deterministic. It does not include hosted telemetry, external benchmark providers, or remote evaluation infrastructure. + +## Consequences + +Positive: + +- quality claims are reproducible from documented commands and repo-local fixtures +- importer and correction outcomes are measured together, not in isolated success-only checks +- launch docs in `P9-S38` can cite committed baseline evidence directly + +Negative: + +- harness results are scoped to local deterministic fixtures, not production traffic variation +- broader benchmark dimensions remain deferred beyond `P9-S37` + +## Alternatives Considered + +### Keep only retrieval fixture evaluation without importer replay + +Rejected because it would miss importer success/duplicate posture and correction-aware continuity evidence required by sprint acceptance. + +### Build hosted benchmark infrastructure in `P9-S37` + +Rejected because hosted evaluation is out of scope and would delay shipping deterministic local evidence. diff --git a/docs/phase9-sprint-33-38-plan.md b/docs/phase9-sprint-33-38-plan.md index 8d7f91f..a23c337 100644 --- a/docs/phase9-sprint-33-38-plan.md +++ b/docs/phase9-sprint-33-38-plan.md @@ -141,7 +141,7 @@ Make Alice usable immediately by external assistants through a stable small tool - broad tool surface expansion - remote hosted auth systems -## Sprint 36 (P9-S36) (current delivery) +## Sprint 36 (P9-S36) (shipped baseline) ### Title @@ -177,7 +177,7 @@ Prove Alice is agent-agnostic and materially improves an existing agent stack. - generic platform SDK - many external integrations at once -## Sprint 37 (P9-S37) +## Sprint 37 (P9-S37) (shipped baseline) ### Title @@ -197,12 +197,16 @@ Make the public product sticky fast and prove it is better, not just broader. ### Deliverables - at least three production-usable importers + - OpenClaw + - Markdown + - ChatGPT export - benchmark flows for: - recall precision - resumption usefulness - correction effectiveness - - open-loop retrieval quality -- sample eval report + - importer success and duplicate-memory posture +- local evaluation harness command: `./scripts/run_phase9_eval.sh` +- sample eval report: `eval/baselines/phase9_s37_baseline.json` ### Acceptance Criteria @@ -216,7 +220,7 @@ Make the public product sticky fast and prove it is better, not just broader. - launch narrative polish - broad UI work -## Sprint 38 (P9-S38) +## Sprint 38 (P9-S38) (current delivery seam) ### Title diff --git a/eval/baselines/phase9_s37_baseline.json b/eval/baselines/phase9_s37_baseline.json new file mode 100644 index 0000000..428872b --- /dev/null +++ b/eval/baselines/phase9_s37_baseline.json @@ -0,0 +1,207 @@ +{ + "correction_effectiveness": { + "after_top_id": "26ef8d40-cd74-4b6a-a40f-4e1140b50482", + "before_top_id": "7c19aeb1-74c0-432d-afef-ac116eefcc27", + "effective": true, + "replacement_id": "26ef8d40-cd74-4b6a-a40f-4e1140b50482", + "target_importer": "openclaw" + }, + "generated_at": "2026-04-08T08:22:21.423211+00:00", + "importer_runs": [ + { + "duplicate_posture_ok": true, + "first_run": { + "dedupe_posture": "workspace_and_payload_fingerprint", + "fixture_id": "openclaw-s36-workspace-v1", + "imported_capture_event_ids": [ + "1925f916-0926-45bd-ad3b-0408c368ea68", + "57037b66-34b2-47d3-8ee3-502a1241dd89", + "c3146467-ad33-4e2f-a75f-793a54ee2bd3", + "069a479b-a502-420e-bf19-7eeadb8bb6c1" + ], + "imported_count": 4, + "imported_object_ids": [ + "7c19aeb1-74c0-432d-afef-ac116eefcc27", + "563f57c4-2227-4cb8-ab6e-e9b8da8ce9aa", + "5813abf4-fe88-48cc-9fb2-56956aff7a3e", + "094013b0-697c-4ac4-a1ce-18efcc4f25d5" + ], + "provenance_source_kind": "openclaw_import", + "skipped_duplicates": 1, + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/openclaw/workspace_v1.json", + "status": "ok", + "total_candidates": 5, + "workspace_id": "openclaw-workspace-demo-001", + "workspace_name": "OpenClaw Interop Demo" + }, + "import_success": true, + "importer": "openclaw", + "second_run": { + "dedupe_posture": "workspace_and_payload_fingerprint", + "fixture_id": "openclaw-s36-workspace-v1", + "imported_capture_event_ids": [], + "imported_count": 0, + "imported_object_ids": [], + "provenance_source_kind": "openclaw_import", + "skipped_duplicates": 5, + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/openclaw/workspace_v1.json", + "status": "noop", + "total_candidates": 5, + "workspace_id": "openclaw-workspace-demo-001", + "workspace_name": "OpenClaw Interop Demo" + }, + "source_kind": "openclaw_import", + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/openclaw/workspace_v1.json" + }, + { + "duplicate_posture_ok": true, + "first_run": { + "dedupe_posture": "workspace_and_line_fingerprint", + "fixture_id": "markdown-s37-workspace-v1", + "imported_capture_event_ids": [ + "7d643a3d-0ca1-476a-b505-f5e7ca2f7cda", + "716c19f5-6065-4380-9581-22a0e0e9b7f7", + "c8745899-7c27-44bf-bf5e-b0f0582cb2b1", + "e7a43dd0-dbb1-44ca-a1e5-9a07a7a2ba06" + ], + "imported_count": 4, + "imported_object_ids": [ + "dc742ed2-1ba8-4de5-84d1-ece333faab13", + "72123f2d-a870-4592-b585-04d2705722c0", + "c4a317b6-7400-4526-b554-e8a6af1c2c8f", + "fcfc9203-1d86-4e7d-85cd-5e89af03a5ad" + ], + "provenance_source_kind": "markdown_import", + "skipped_duplicates": 1, + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/importers/markdown/workspace_v1.md", + "status": "ok", + "total_candidates": 5, + "workspace_id": "markdown-workspace-demo-001", + "workspace_name": "Markdown Import Demo" + }, + "import_success": true, + "importer": "markdown", + "second_run": { + "dedupe_posture": "workspace_and_line_fingerprint", + "fixture_id": "markdown-s37-workspace-v1", + "imported_capture_event_ids": [], + "imported_count": 0, + "imported_object_ids": [], + "provenance_source_kind": "markdown_import", + "skipped_duplicates": 5, + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/importers/markdown/workspace_v1.md", + "status": "noop", + "total_candidates": 5, + "workspace_id": "markdown-workspace-demo-001", + "workspace_name": "Markdown Import Demo" + }, + "source_kind": "markdown_import", + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/importers/markdown/workspace_v1.md" + }, + { + "duplicate_posture_ok": true, + "first_run": { + "dedupe_posture": "workspace_conversation_message_fingerprint", + "fixture_id": "chatgpt-s37-workspace-v1", + "imported_capture_event_ids": [ + "a6fc88a5-5fb0-4f21-a243-1ddb964ca07b", + "f1f9fd8e-ad40-4d5d-ac52-784200cab2e9", + "dac1accf-256f-4985-8edd-29803fa56744", + "45df5b84-765b-4d34-bc60-50210d1b2d9a" + ], + "imported_count": 4, + "imported_object_ids": [ + "114e5abb-3715-40fb-90b3-7acabd2c63f4", + "316172c4-fce1-4940-a087-7c5e54518a9d", + "98e4ed19-d664-4aad-84dc-91e7c93bb6b2", + "e9b21c9a-7f35-4138-87a1-0b3f9dc6ee73" + ], + "provenance_source_kind": "chatgpt_import", + "skipped_duplicates": 1, + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/importers/chatgpt/workspace_v1.json", + "status": "ok", + "total_candidates": 5, + "workspace_id": "chatgpt-workspace-demo-001", + "workspace_name": "ChatGPT Import Demo" + }, + "import_success": true, + "importer": "chatgpt", + "second_run": { + "dedupe_posture": "workspace_conversation_message_fingerprint", + "fixture_id": "chatgpt-s37-workspace-v1", + "imported_capture_event_ids": [], + "imported_count": 0, + "imported_object_ids": [], + "provenance_source_kind": "chatgpt_import", + "skipped_duplicates": 5, + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/importers/chatgpt/workspace_v1.json", + "status": "noop", + "total_candidates": 5, + "workspace_id": "chatgpt-workspace-demo-001", + "workspace_name": "ChatGPT Import Demo" + }, + "source_kind": "chatgpt_import", + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/importers/chatgpt/workspace_v1.json" + } + ], + "recall_precision_checks": [ + { + "expected_source_kind": "openclaw_import", + "hit": true, + "importer": "openclaw", + "query": "MCP tool surface", + "returned_count": 1, + "top_source_kind": "openclaw_import" + }, + { + "expected_source_kind": "markdown_import", + "hit": true, + "importer": "markdown", + "query": "markdown importer deterministic", + "returned_count": 4, + "top_source_kind": "markdown_import" + }, + { + "expected_source_kind": "chatgpt_import", + "hit": true, + "importer": "chatgpt", + "query": "ChatGPT import provenance explicit", + "returned_count": 4, + "top_source_kind": "chatgpt_import" + } + ], + "resumption_usefulness_checks": [ + { + "expected_source_kind": "openclaw_import", + "importer": "openclaw", + "last_decision_source_kind": "openclaw_import", + "next_action_source_kind": "openclaw_import", + "useful": true + }, + { + "expected_source_kind": "markdown_import", + "importer": "markdown", + "last_decision_source_kind": "markdown_import", + "next_action_source_kind": "markdown_import", + "useful": true + }, + { + "expected_source_kind": "chatgpt_import", + "importer": "chatgpt", + "last_decision_source_kind": "chatgpt_import", + "next_action_source_kind": "chatgpt_import", + "useful": true + } + ], + "schema_version": "phase9_eval_v1", + "summary": { + "correction_effectiveness_rate": 1.0, + "duplicate_posture_rate": 1.0, + "importer_count": 3, + "importer_success_rate": 1.0, + "pass_threshold": 1.0, + "recall_precision_at_1": 1.0, + "resumption_usefulness_rate": 1.0, + "status": "pass" + } +} diff --git a/eval/reports/phase9_eval_latest.json b/eval/reports/phase9_eval_latest.json new file mode 100644 index 0000000..428872b --- /dev/null +++ b/eval/reports/phase9_eval_latest.json @@ -0,0 +1,207 @@ +{ + "correction_effectiveness": { + "after_top_id": "26ef8d40-cd74-4b6a-a40f-4e1140b50482", + "before_top_id": "7c19aeb1-74c0-432d-afef-ac116eefcc27", + "effective": true, + "replacement_id": "26ef8d40-cd74-4b6a-a40f-4e1140b50482", + "target_importer": "openclaw" + }, + "generated_at": "2026-04-08T08:22:21.423211+00:00", + "importer_runs": [ + { + "duplicate_posture_ok": true, + "first_run": { + "dedupe_posture": "workspace_and_payload_fingerprint", + "fixture_id": "openclaw-s36-workspace-v1", + "imported_capture_event_ids": [ + "1925f916-0926-45bd-ad3b-0408c368ea68", + "57037b66-34b2-47d3-8ee3-502a1241dd89", + "c3146467-ad33-4e2f-a75f-793a54ee2bd3", + "069a479b-a502-420e-bf19-7eeadb8bb6c1" + ], + "imported_count": 4, + "imported_object_ids": [ + "7c19aeb1-74c0-432d-afef-ac116eefcc27", + "563f57c4-2227-4cb8-ab6e-e9b8da8ce9aa", + "5813abf4-fe88-48cc-9fb2-56956aff7a3e", + "094013b0-697c-4ac4-a1ce-18efcc4f25d5" + ], + "provenance_source_kind": "openclaw_import", + "skipped_duplicates": 1, + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/openclaw/workspace_v1.json", + "status": "ok", + "total_candidates": 5, + "workspace_id": "openclaw-workspace-demo-001", + "workspace_name": "OpenClaw Interop Demo" + }, + "import_success": true, + "importer": "openclaw", + "second_run": { + "dedupe_posture": "workspace_and_payload_fingerprint", + "fixture_id": "openclaw-s36-workspace-v1", + "imported_capture_event_ids": [], + "imported_count": 0, + "imported_object_ids": [], + "provenance_source_kind": "openclaw_import", + "skipped_duplicates": 5, + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/openclaw/workspace_v1.json", + "status": "noop", + "total_candidates": 5, + "workspace_id": "openclaw-workspace-demo-001", + "workspace_name": "OpenClaw Interop Demo" + }, + "source_kind": "openclaw_import", + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/openclaw/workspace_v1.json" + }, + { + "duplicate_posture_ok": true, + "first_run": { + "dedupe_posture": "workspace_and_line_fingerprint", + "fixture_id": "markdown-s37-workspace-v1", + "imported_capture_event_ids": [ + "7d643a3d-0ca1-476a-b505-f5e7ca2f7cda", + "716c19f5-6065-4380-9581-22a0e0e9b7f7", + "c8745899-7c27-44bf-bf5e-b0f0582cb2b1", + "e7a43dd0-dbb1-44ca-a1e5-9a07a7a2ba06" + ], + "imported_count": 4, + "imported_object_ids": [ + "dc742ed2-1ba8-4de5-84d1-ece333faab13", + "72123f2d-a870-4592-b585-04d2705722c0", + "c4a317b6-7400-4526-b554-e8a6af1c2c8f", + "fcfc9203-1d86-4e7d-85cd-5e89af03a5ad" + ], + "provenance_source_kind": "markdown_import", + "skipped_duplicates": 1, + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/importers/markdown/workspace_v1.md", + "status": "ok", + "total_candidates": 5, + "workspace_id": "markdown-workspace-demo-001", + "workspace_name": "Markdown Import Demo" + }, + "import_success": true, + "importer": "markdown", + "second_run": { + "dedupe_posture": "workspace_and_line_fingerprint", + "fixture_id": "markdown-s37-workspace-v1", + "imported_capture_event_ids": [], + "imported_count": 0, + "imported_object_ids": [], + "provenance_source_kind": "markdown_import", + "skipped_duplicates": 5, + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/importers/markdown/workspace_v1.md", + "status": "noop", + "total_candidates": 5, + "workspace_id": "markdown-workspace-demo-001", + "workspace_name": "Markdown Import Demo" + }, + "source_kind": "markdown_import", + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/importers/markdown/workspace_v1.md" + }, + { + "duplicate_posture_ok": true, + "first_run": { + "dedupe_posture": "workspace_conversation_message_fingerprint", + "fixture_id": "chatgpt-s37-workspace-v1", + "imported_capture_event_ids": [ + "a6fc88a5-5fb0-4f21-a243-1ddb964ca07b", + "f1f9fd8e-ad40-4d5d-ac52-784200cab2e9", + "dac1accf-256f-4985-8edd-29803fa56744", + "45df5b84-765b-4d34-bc60-50210d1b2d9a" + ], + "imported_count": 4, + "imported_object_ids": [ + "114e5abb-3715-40fb-90b3-7acabd2c63f4", + "316172c4-fce1-4940-a087-7c5e54518a9d", + "98e4ed19-d664-4aad-84dc-91e7c93bb6b2", + "e9b21c9a-7f35-4138-87a1-0b3f9dc6ee73" + ], + "provenance_source_kind": "chatgpt_import", + "skipped_duplicates": 1, + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/importers/chatgpt/workspace_v1.json", + "status": "ok", + "total_candidates": 5, + "workspace_id": "chatgpt-workspace-demo-001", + "workspace_name": "ChatGPT Import Demo" + }, + "import_success": true, + "importer": "chatgpt", + "second_run": { + "dedupe_posture": "workspace_conversation_message_fingerprint", + "fixture_id": "chatgpt-s37-workspace-v1", + "imported_capture_event_ids": [], + "imported_count": 0, + "imported_object_ids": [], + "provenance_source_kind": "chatgpt_import", + "skipped_duplicates": 5, + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/importers/chatgpt/workspace_v1.json", + "status": "noop", + "total_candidates": 5, + "workspace_id": "chatgpt-workspace-demo-001", + "workspace_name": "ChatGPT Import Demo" + }, + "source_kind": "chatgpt_import", + "source_path": "/Users/samirusani/Desktop/Codex/AliceBot/fixtures/importers/chatgpt/workspace_v1.json" + } + ], + "recall_precision_checks": [ + { + "expected_source_kind": "openclaw_import", + "hit": true, + "importer": "openclaw", + "query": "MCP tool surface", + "returned_count": 1, + "top_source_kind": "openclaw_import" + }, + { + "expected_source_kind": "markdown_import", + "hit": true, + "importer": "markdown", + "query": "markdown importer deterministic", + "returned_count": 4, + "top_source_kind": "markdown_import" + }, + { + "expected_source_kind": "chatgpt_import", + "hit": true, + "importer": "chatgpt", + "query": "ChatGPT import provenance explicit", + "returned_count": 4, + "top_source_kind": "chatgpt_import" + } + ], + "resumption_usefulness_checks": [ + { + "expected_source_kind": "openclaw_import", + "importer": "openclaw", + "last_decision_source_kind": "openclaw_import", + "next_action_source_kind": "openclaw_import", + "useful": true + }, + { + "expected_source_kind": "markdown_import", + "importer": "markdown", + "last_decision_source_kind": "markdown_import", + "next_action_source_kind": "markdown_import", + "useful": true + }, + { + "expected_source_kind": "chatgpt_import", + "importer": "chatgpt", + "last_decision_source_kind": "chatgpt_import", + "next_action_source_kind": "chatgpt_import", + "useful": true + } + ], + "schema_version": "phase9_eval_v1", + "summary": { + "correction_effectiveness_rate": 1.0, + "duplicate_posture_rate": 1.0, + "importer_count": 3, + "importer_success_rate": 1.0, + "pass_threshold": 1.0, + "recall_precision_at_1": 1.0, + "resumption_usefulness_rate": 1.0, + "status": "pass" + } +} diff --git a/fixtures/importers/chatgpt/workspace_v1.json b/fixtures/importers/chatgpt/workspace_v1.json new file mode 100644 index 0000000..149caf7 --- /dev/null +++ b/fixtures/importers/chatgpt/workspace_v1.json @@ -0,0 +1,52 @@ +{ + "fixture_id": "chatgpt-s37-workspace-v1", + "workspace": { + "id": "chatgpt-workspace-demo-001", + "name": "ChatGPT Import Demo" + }, + "conversations": [ + { + "id": "bbbbbbbb-bbbb-4bbb-8bbb-bbbbbbbbbbbb", + "title": "Phase 9 Import Plan", + "project": "ChatGPT Import Project", + "person": "Interop Owner", + "messages": [ + { + "id": "cgpt-msg-001", + "role": "assistant", + "text": "Decision: Keep ChatGPT import provenance explicit for every message.", + "status": "active", + "confidence": 0.95 + }, + { + "id": "cgpt-msg-002", + "role": "assistant", + "text": "Next Action: Run ChatGPT fixture import before phase9 evaluation.", + "status": "active", + "confidence": 0.92 + }, + { + "id": "cgpt-msg-003", + "role": "assistant", + "text": "Blocker: Need reviewer confirmation on dedupe posture.", + "status": "active", + "confidence": 0.90 + }, + { + "id": "cgpt-msg-004", + "role": "assistant", + "text": "Commitment: Capture correction outcome evidence in eval report.", + "status": "completed", + "confidence": 0.91 + }, + { + "id": "cgpt-msg-004", + "role": "assistant", + "text": "Commitment: Capture correction outcome evidence in eval report.", + "status": "completed", + "confidence": 0.91 + } + ] + } + ] +} diff --git a/fixtures/importers/markdown/workspace_v1.md b/fixtures/importers/markdown/workspace_v1.md new file mode 100644 index 0000000..00c5f62 --- /dev/null +++ b/fixtures/importers/markdown/workspace_v1.md @@ -0,0 +1,19 @@ +--- +fixture_id: markdown-s37-workspace-v1 +workspace_id: markdown-workspace-demo-001 +workspace_name: Markdown Import Demo +thread_id: eeeeeeee-eeee-4eee-8eee-eeeeeeeeeeee +task_id: ffffffff-ffff-4fff-8fff-ffffffffffff +project: Markdown Import Project +person: Markdown Interop Owner +default_status: active +default_confidence: 0.90 +--- + +# Markdown Continuity Snapshot + +- Decision: Keep markdown importer deterministic for baseline evidence. | source_event_id=markdown-event-0001 | confirmation_status=confirmed +- Next Action: Run markdown fixture import before evaluation harness. | source_event_id=markdown-event-0002 | person=Build Engineer | confirmation_status=confirmed +- Waiting For: Control Tower reviewer PASS on markdown importer verification. | source_event_id=markdown-event-0003 +- Commitment: Publish markdown importer usage docs in README. | status=completed | source_event_id=markdown-event-0004 | confirmation_status=confirmed +- Commitment: Publish markdown importer usage docs in README. | status=completed | source_event_id=markdown-event-0004 | confirmation_status=confirmed diff --git a/scripts/load_chatgpt_sample_data.py b/scripts/load_chatgpt_sample_data.py new file mode 100755 index 0000000..ce5ef31 --- /dev/null +++ b/scripts/load_chatgpt_sample_data.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +from pathlib import Path +import sys +from uuid import UUID + + +REPO_ROOT = Path(__file__).resolve().parents[1] +API_SRC = REPO_ROOT / "apps" / "api" / "src" +if str(API_SRC) not in sys.path: + sys.path.insert(0, str(API_SRC)) + +from alicebot_api.chatgpt_import import import_chatgpt_source +from alicebot_api.db import user_connection +from alicebot_api.store import ContinuityStore + + +DEFAULT_DATABASE_URL = "postgresql://alicebot_app:alicebot_app@localhost:5432/alicebot" +DEFAULT_AUTH_USER_ID = "00000000-0000-0000-0000-000000000001" +DEFAULT_SOURCE_PATH = REPO_ROOT / "fixtures" / "importers" / "chatgpt" / "workspace_v1.json" + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Import ChatGPT sample export data into Alice continuity objects." + ) + parser.add_argument( + "--source", + default=os.getenv("CHATGPT_SAMPLE_DATA_PATH", str(DEFAULT_SOURCE_PATH)), + help="Path to a ChatGPT export JSON file or directory.", + ) + parser.add_argument( + "--database-url", + default=os.getenv("DATABASE_URL", DEFAULT_DATABASE_URL), + help="Database URL used for writes.", + ) + parser.add_argument( + "--user-id", + default=os.getenv("ALICEBOT_AUTH_USER_ID", DEFAULT_AUTH_USER_ID), + help="User ID to own imported ChatGPT data.", + ) + parser.add_argument( + "--user-email", + default=os.getenv("ALICEBOT_IMPORT_USER_EMAIL", "chatgpt-sample@example.com"), + help="Email for auto-created user when --user-id is not found.", + ) + parser.add_argument( + "--display-name", + default=os.getenv("ALICEBOT_IMPORT_USER_DISPLAY_NAME", "ChatGPT Sample User"), + help="Display name for auto-created user when --user-id is not found.", + ) + return parser.parse_args() + + +def _ensure_user(store: ContinuityStore, *, user_id: UUID, email: str, display_name: str) -> None: + with store.conn.cursor() as cur: + cur.execute("SELECT 1 FROM users WHERE id = %s", (user_id,)) + exists = cur.fetchone() is not None + if exists: + return + store.create_user(user_id, email, display_name) + + +def main() -> int: + args = _parse_args() + source_path = Path(args.source).expanduser().resolve() + user_id = UUID(str(args.user_id)) + + with user_connection(args.database_url, user_id) as conn: + store = ContinuityStore(conn) + _ensure_user( + store, + user_id=user_id, + email=str(args.user_email), + display_name=str(args.display_name), + ) + summary = import_chatgpt_source( + store, + user_id=user_id, + source=source_path, + ) + + print( + json.dumps( + { + **summary, + "user_id": str(user_id), + "source_path": str(source_path), + }, + indent=2, + sort_keys=True, + ) + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/load_chatgpt_sample_data.sh b/scripts/load_chatgpt_sample_data.sh new file mode 100755 index 0000000..8022dfa --- /dev/null +++ b/scripts/load_chatgpt_sample_data.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd -- "${SCRIPT_DIR}/.." && pwd)" + +if [ -f "${REPO_ROOT}/.env" ]; then + set -a + . "${REPO_ROOT}/.env" + set +a +fi + +PYTHON_BIN="python3" +if [ -x "${REPO_ROOT}/.venv/bin/python" ]; then + PYTHON_BIN="${REPO_ROOT}/.venv/bin/python" +fi + +cd "${REPO_ROOT}" + +exec "${PYTHON_BIN}" "${REPO_ROOT}/scripts/load_chatgpt_sample_data.py" "$@" diff --git a/scripts/load_markdown_sample_data.py b/scripts/load_markdown_sample_data.py new file mode 100755 index 0000000..3a969df --- /dev/null +++ b/scripts/load_markdown_sample_data.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +from pathlib import Path +import sys +from uuid import UUID + + +REPO_ROOT = Path(__file__).resolve().parents[1] +API_SRC = REPO_ROOT / "apps" / "api" / "src" +if str(API_SRC) not in sys.path: + sys.path.insert(0, str(API_SRC)) + +from alicebot_api.db import user_connection +from alicebot_api.markdown_import import import_markdown_source +from alicebot_api.store import ContinuityStore + + +DEFAULT_DATABASE_URL = "postgresql://alicebot_app:alicebot_app@localhost:5432/alicebot" +DEFAULT_AUTH_USER_ID = "00000000-0000-0000-0000-000000000001" +DEFAULT_SOURCE_PATH = REPO_ROOT / "fixtures" / "importers" / "markdown" / "workspace_v1.md" + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Import markdown sample workspace data into Alice continuity objects." + ) + parser.add_argument( + "--source", + default=os.getenv("MARKDOWN_SAMPLE_DATA_PATH", str(DEFAULT_SOURCE_PATH)), + help="Path to a markdown file or directory.", + ) + parser.add_argument( + "--database-url", + default=os.getenv("DATABASE_URL", DEFAULT_DATABASE_URL), + help="Database URL used for writes.", + ) + parser.add_argument( + "--user-id", + default=os.getenv("ALICEBOT_AUTH_USER_ID", DEFAULT_AUTH_USER_ID), + help="User ID to own imported markdown data.", + ) + parser.add_argument( + "--user-email", + default=os.getenv("ALICEBOT_IMPORT_USER_EMAIL", "markdown-sample@example.com"), + help="Email for auto-created user when --user-id is not found.", + ) + parser.add_argument( + "--display-name", + default=os.getenv("ALICEBOT_IMPORT_USER_DISPLAY_NAME", "Markdown Sample User"), + help="Display name for auto-created user when --user-id is not found.", + ) + return parser.parse_args() + + +def _ensure_user(store: ContinuityStore, *, user_id: UUID, email: str, display_name: str) -> None: + with store.conn.cursor() as cur: + cur.execute("SELECT 1 FROM users WHERE id = %s", (user_id,)) + exists = cur.fetchone() is not None + if exists: + return + store.create_user(user_id, email, display_name) + + +def main() -> int: + args = _parse_args() + source_path = Path(args.source).expanduser().resolve() + user_id = UUID(str(args.user_id)) + + with user_connection(args.database_url, user_id) as conn: + store = ContinuityStore(conn) + _ensure_user( + store, + user_id=user_id, + email=str(args.user_email), + display_name=str(args.display_name), + ) + summary = import_markdown_source( + store, + user_id=user_id, + source=source_path, + ) + + print( + json.dumps( + { + **summary, + "user_id": str(user_id), + "source_path": str(source_path), + }, + indent=2, + sort_keys=True, + ) + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/load_markdown_sample_data.sh b/scripts/load_markdown_sample_data.sh new file mode 100755 index 0000000..1b5ff79 --- /dev/null +++ b/scripts/load_markdown_sample_data.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd -- "${SCRIPT_DIR}/.." && pwd)" + +if [ -f "${REPO_ROOT}/.env" ]; then + set -a + . "${REPO_ROOT}/.env" + set +a +fi + +PYTHON_BIN="python3" +if [ -x "${REPO_ROOT}/.venv/bin/python" ]; then + PYTHON_BIN="${REPO_ROOT}/.venv/bin/python" +fi + +cd "${REPO_ROOT}" + +exec "${PYTHON_BIN}" "${REPO_ROOT}/scripts/load_markdown_sample_data.py" "$@" diff --git a/scripts/run_phase9_eval.py b/scripts/run_phase9_eval.py new file mode 100755 index 0000000..f464262 --- /dev/null +++ b/scripts/run_phase9_eval.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +from pathlib import Path +import sys +from uuid import UUID + + +REPO_ROOT = Path(__file__).resolve().parents[1] +_VENV_REEXEC_ENV = "ALICEBOT_PHASE9_EVAL_REEXEC" + + +def _maybe_reexec_into_repo_venv() -> None: + if os.getenv(_VENV_REEXEC_ENV) == "1": + return + + venv_python = (REPO_ROOT / ".venv" / "bin" / "python").resolve() + if not venv_python.exists(): + return + + current_python = Path(sys.executable).expanduser().resolve() + if current_python == venv_python: + return + + os.environ[_VENV_REEXEC_ENV] = "1" + os.execv( + str(venv_python), + [ + str(venv_python), + str(Path(__file__).resolve()), + *sys.argv[1:], + ], + ) + + +_maybe_reexec_into_repo_venv() + +API_SRC = REPO_ROOT / "apps" / "api" / "src" +if str(API_SRC) not in sys.path: + sys.path.insert(0, str(API_SRC)) + +from alicebot_api.db import user_connection +from alicebot_api.retrieval_evaluation import run_phase9_evaluation, write_phase9_evaluation_report +from alicebot_api.store import ContinuityStore + + +DEFAULT_DATABASE_URL = "postgresql://alicebot_app:alicebot_app@localhost:5432/alicebot" +DEFAULT_AUTH_USER_ID = "00000000-0000-0000-0000-000000000001" +DEFAULT_REPORT_PATH = REPO_ROOT / "eval" / "reports" / "phase9_eval_latest.json" +DEFAULT_OPENCLAW_SOURCE = REPO_ROOT / "fixtures" / "openclaw" / "workspace_v1.json" +DEFAULT_MARKDOWN_SOURCE = REPO_ROOT / "fixtures" / "importers" / "markdown" / "workspace_v1.md" +DEFAULT_CHATGPT_SOURCE = REPO_ROOT / "fixtures" / "importers" / "chatgpt" / "workspace_v1.json" + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Run Phase 9 importer and continuity evaluation harness and write a baseline report." + ) + parser.add_argument( + "--database-url", + default=os.getenv("DATABASE_URL", DEFAULT_DATABASE_URL), + help="Database URL used for writes and reads.", + ) + parser.add_argument( + "--user-id", + default=os.getenv("ALICEBOT_AUTH_USER_ID", DEFAULT_AUTH_USER_ID), + help="User ID to own the evaluation run data.", + ) + parser.add_argument( + "--user-email", + default=os.getenv("ALICEBOT_IMPORT_USER_EMAIL", "phase9-eval@example.com"), + help="Email for auto-created user when --user-id is not found.", + ) + parser.add_argument( + "--display-name", + default=os.getenv("ALICEBOT_IMPORT_USER_DISPLAY_NAME", "Phase9 Eval User"), + help="Display name for auto-created user when --user-id is not found.", + ) + parser.add_argument( + "--openclaw-source", + default=str(DEFAULT_OPENCLAW_SOURCE), + help="Path to OpenClaw fixture source.", + ) + parser.add_argument( + "--markdown-source", + default=str(DEFAULT_MARKDOWN_SOURCE), + help="Path to markdown fixture source.", + ) + parser.add_argument( + "--chatgpt-source", + default=str(DEFAULT_CHATGPT_SOURCE), + help="Path to ChatGPT fixture source.", + ) + parser.add_argument( + "--report-path", + default=str(DEFAULT_REPORT_PATH), + help="Output JSON report path.", + ) + return parser.parse_args() + + +def _ensure_user(store: ContinuityStore, *, user_id: UUID, email: str, display_name: str) -> None: + with store.conn.cursor() as cur: + cur.execute("SELECT 1 FROM users WHERE id = %s", (user_id,)) + exists = cur.fetchone() is not None + if exists: + return + store.create_user(user_id, email, display_name) + + +def main() -> int: + args = _parse_args() + user_id = UUID(str(args.user_id)) + + with user_connection(args.database_url, user_id) as conn: + store = ContinuityStore(conn) + _ensure_user( + store, + user_id=user_id, + email=str(args.user_email), + display_name=str(args.display_name), + ) + + report = run_phase9_evaluation( + store, + user_id=user_id, + openclaw_source=Path(str(args.openclaw_source)).expanduser().resolve(), + markdown_source=Path(str(args.markdown_source)).expanduser().resolve(), + chatgpt_source=Path(str(args.chatgpt_source)).expanduser().resolve(), + ) + + output_path = write_phase9_evaluation_report( + report=report, + report_path=Path(str(args.report_path)).expanduser().resolve(), + ) + + print( + json.dumps( + { + "status": report["summary"]["status"], + "report_path": str(output_path), + "summary": report["summary"], + }, + indent=2, + sort_keys=True, + ) + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/run_phase9_eval.sh b/scripts/run_phase9_eval.sh new file mode 100755 index 0000000..ce6d63b --- /dev/null +++ b/scripts/run_phase9_eval.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd -- "${SCRIPT_DIR}/.." && pwd)" + +if [ -f "${REPO_ROOT}/.env" ]; then + set -a + . "${REPO_ROOT}/.env" + set +a +fi + +PYTHON_BIN="python3" +if [ -x "${REPO_ROOT}/.venv/bin/python" ]; then + PYTHON_BIN="${REPO_ROOT}/.venv/bin/python" +fi + +cd "${REPO_ROOT}" + +exec "${PYTHON_BIN}" "${REPO_ROOT}/scripts/run_phase9_eval.py" "$@" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/integration/test_chatgpt_import.py b/tests/integration/test_chatgpt_import.py new file mode 100644 index 0000000..5893be7 --- /dev/null +++ b/tests/integration/test_chatgpt_import.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +from pathlib import Path +from uuid import UUID, uuid4 + +from alicebot_api.chatgpt_import import import_chatgpt_source +from alicebot_api.continuity_recall import query_continuity_recall +from alicebot_api.continuity_resumption import compile_continuity_resumption_brief +from alicebot_api.contracts import ContinuityRecallQueryInput, ContinuityResumptionBriefRequestInput +from alicebot_api.db import user_connection +from alicebot_api.store import ContinuityStore + + +REPO_ROOT = Path(__file__).resolve().parents[2] +CHATGPT_FIXTURE_PATH = REPO_ROOT / "fixtures" / "importers" / "chatgpt" / "workspace_v1.json" +THREAD_ID = UUID("bbbbbbbb-bbbb-4bbb-8bbb-bbbbbbbbbbbb") + + +def seed_user(database_url: str, *, email: str) -> UUID: + user_id = uuid4() + with user_connection(database_url, user_id) as conn: + ContinuityStore(conn).create_user(user_id, email, email.split("@", 1)[0].title()) + return user_id + + +def test_chatgpt_import_supports_recall_resumption_and_idempotent_dedupe(migrated_database_urls) -> None: + user_id = seed_user(migrated_database_urls["app"], email="chatgpt-import@example.com") + + with user_connection(migrated_database_urls["app"], user_id) as conn: + store = ContinuityStore(conn) + + first_import = import_chatgpt_source( + store, + user_id=user_id, + source=CHATGPT_FIXTURE_PATH, + ) + + assert first_import["status"] == "ok" + assert first_import["fixture_id"] == "chatgpt-s37-workspace-v1" + assert first_import["workspace_id"] == "chatgpt-workspace-demo-001" + assert first_import["total_candidates"] == 5 + assert first_import["imported_count"] == 4 + assert first_import["skipped_duplicates"] == 1 + assert first_import["provenance_source_kind"] == "chatgpt_import" + + recall = query_continuity_recall( + store, + user_id=user_id, + request=ContinuityRecallQueryInput( + thread_id=THREAD_ID, + project="ChatGPT Import Project", + query="ChatGPT import provenance explicit", + limit=20, + ), + ) + + assert recall["summary"]["returned_count"] == 4 + assert all(item["provenance"]["source_kind"] == "chatgpt_import" for item in recall["items"]) + assert all( + item["provenance"].get("chatgpt_workspace_id") == "chatgpt-workspace-demo-001" + for item in recall["items"] + ) + + resumption = compile_continuity_resumption_brief( + store, + user_id=user_id, + request=ContinuityResumptionBriefRequestInput( + thread_id=THREAD_ID, + project="ChatGPT Import Project", + query="ChatGPT import provenance explicit", + max_recent_changes=10, + max_open_loops=10, + ), + ) + + brief = resumption["brief"] + assert brief["last_decision"]["item"] is not None + assert brief["last_decision"]["item"]["provenance"]["source_kind"] == "chatgpt_import" + assert brief["next_action"]["item"] is not None + assert brief["next_action"]["item"]["provenance"]["source_kind"] == "chatgpt_import" + + second_import = import_chatgpt_source( + store, + user_id=user_id, + source=CHATGPT_FIXTURE_PATH, + ) + + assert second_import["status"] == "noop" + assert second_import["total_candidates"] == 5 + assert second_import["imported_count"] == 0 + assert second_import["skipped_duplicates"] == 5 diff --git a/tests/integration/test_markdown_import.py b/tests/integration/test_markdown_import.py new file mode 100644 index 0000000..615803a --- /dev/null +++ b/tests/integration/test_markdown_import.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +from pathlib import Path +from uuid import UUID, uuid4 + +from alicebot_api.continuity_recall import query_continuity_recall +from alicebot_api.continuity_resumption import compile_continuity_resumption_brief +from alicebot_api.contracts import ContinuityRecallQueryInput, ContinuityResumptionBriefRequestInput +from alicebot_api.db import user_connection +from alicebot_api.markdown_import import import_markdown_source +from alicebot_api.store import ContinuityStore + + +REPO_ROOT = Path(__file__).resolve().parents[2] +MARKDOWN_FIXTURE_PATH = REPO_ROOT / "fixtures" / "importers" / "markdown" / "workspace_v1.md" +THREAD_ID = UUID("eeeeeeee-eeee-4eee-8eee-eeeeeeeeeeee") + + +def seed_user(database_url: str, *, email: str) -> UUID: + user_id = uuid4() + with user_connection(database_url, user_id) as conn: + ContinuityStore(conn).create_user(user_id, email, email.split("@", 1)[0].title()) + return user_id + + +def test_markdown_import_supports_recall_resumption_and_idempotent_dedupe(migrated_database_urls) -> None: + user_id = seed_user(migrated_database_urls["app"], email="markdown-import@example.com") + + with user_connection(migrated_database_urls["app"], user_id) as conn: + store = ContinuityStore(conn) + + first_import = import_markdown_source( + store, + user_id=user_id, + source=MARKDOWN_FIXTURE_PATH, + ) + + assert first_import["status"] == "ok" + assert first_import["fixture_id"] == "markdown-s37-workspace-v1" + assert first_import["workspace_id"] == "markdown-workspace-demo-001" + assert first_import["total_candidates"] == 5 + assert first_import["imported_count"] == 4 + assert first_import["skipped_duplicates"] == 1 + assert first_import["provenance_source_kind"] == "markdown_import" + + recall = query_continuity_recall( + store, + user_id=user_id, + request=ContinuityRecallQueryInput( + thread_id=THREAD_ID, + project="Markdown Import Project", + query="markdown importer deterministic", + limit=20, + ), + ) + + assert recall["summary"]["returned_count"] == 4 + assert all(item["provenance"]["source_kind"] == "markdown_import" for item in recall["items"]) + assert all( + item["provenance"].get("markdown_workspace_id") == "markdown-workspace-demo-001" + for item in recall["items"] + ) + + resumption = compile_continuity_resumption_brief( + store, + user_id=user_id, + request=ContinuityResumptionBriefRequestInput( + thread_id=THREAD_ID, + project="Markdown Import Project", + query="markdown importer deterministic", + max_recent_changes=10, + max_open_loops=10, + ), + ) + + brief = resumption["brief"] + assert brief["last_decision"]["item"] is not None + assert brief["last_decision"]["item"]["provenance"]["source_kind"] == "markdown_import" + assert brief["next_action"]["item"] is not None + assert brief["next_action"]["item"]["provenance"]["source_kind"] == "markdown_import" + + second_import = import_markdown_source( + store, + user_id=user_id, + source=MARKDOWN_FIXTURE_PATH, + ) + + assert second_import["status"] == "noop" + assert second_import["total_candidates"] == 5 + assert second_import["imported_count"] == 0 + assert second_import["skipped_duplicates"] == 5 diff --git a/tests/integration/test_phase9_eval.py b/tests/integration/test_phase9_eval.py new file mode 100644 index 0000000..621201c --- /dev/null +++ b/tests/integration/test_phase9_eval.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +import json +from pathlib import Path +import subprocess +import sys +from uuid import UUID, uuid4 + +from alicebot_api.db import user_connection +from alicebot_api.store import ContinuityStore + + +REPO_ROOT = Path(__file__).resolve().parents[2] +EVAL_SCRIPT = REPO_ROOT / "scripts" / "run_phase9_eval.py" + + +def seed_user(database_url: str, *, email: str) -> UUID: + user_id = uuid4() + with user_connection(database_url, user_id) as conn: + ContinuityStore(conn).create_user(user_id, email, email.split("@", 1)[0].title()) + return user_id + + +def test_phase9_eval_script_generates_report_with_expected_metrics(migrated_database_urls, tmp_path: Path) -> None: + user_id = seed_user(migrated_database_urls["app"], email="phase9-eval@example.com") + report_path = tmp_path / "phase9_eval_report.json" + + completed = subprocess.run( + [ + sys.executable, + str(EVAL_SCRIPT), + "--database-url", + migrated_database_urls["app"], + "--user-id", + str(user_id), + "--report-path", + str(report_path), + ], + cwd=REPO_ROOT, + capture_output=True, + text=True, + check=True, + ) + + stdout_payload = json.loads(completed.stdout) + assert stdout_payload["status"] == "pass" + + report = json.loads(report_path.read_text(encoding="utf-8")) + summary = report["summary"] + + assert report["schema_version"] == "phase9_eval_v1" + assert summary["status"] == "pass" + assert summary["importer_count"] == 3 + assert summary["importer_success_rate"] == 1.0 + assert summary["duplicate_posture_rate"] == 1.0 + assert summary["recall_precision_at_1"] == 1.0 + assert summary["resumption_usefulness_rate"] == 1.0 + assert summary["correction_effectiveness_rate"] == 1.0 + assert len(report["importer_runs"]) == 3 diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/test_importers.py b/tests/unit/test_importers.py new file mode 100644 index 0000000..2d91f39 --- /dev/null +++ b/tests/unit/test_importers.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from alicebot_api.chatgpt_import import ChatGPTImportValidationError, load_chatgpt_payload +from alicebot_api.markdown_import import MarkdownImportValidationError, load_markdown_payload + + +REPO_ROOT = Path(__file__).resolve().parents[2] +MARKDOWN_FIXTURE = REPO_ROOT / "fixtures" / "importers" / "markdown" / "workspace_v1.md" +CHATGPT_FIXTURE = REPO_ROOT / "fixtures" / "importers" / "chatgpt" / "workspace_v1.json" + + +def test_markdown_adapter_loads_fixture_with_deterministic_mapping() -> None: + first = load_markdown_payload(MARKDOWN_FIXTURE) + second = load_markdown_payload(MARKDOWN_FIXTURE) + + assert first.context.fixture_id == "markdown-s37-workspace-v1" + assert first.context.workspace_id == "markdown-workspace-demo-001" + assert first.context.workspace_name == "Markdown Import Demo" + assert len(first.items) == 5 + + first_item = first.items[0] + assert first_item.object_type == "Decision" + assert first_item.status == "active" + assert first_item.body["decision_text"] == "Keep markdown importer deterministic for baseline evidence." + assert first_item.source_provenance["project"] == "Markdown Import Project" + + assert [item.dedupe_key for item in first.items] == [item.dedupe_key for item in second.items] + + +def test_markdown_adapter_rejects_unclosed_frontmatter(tmp_path: Path) -> None: + source = tmp_path / "broken.md" + source.write_text("---\nfixture_id: x\n- Decision: broken\n", encoding="utf-8") + + with pytest.raises(MarkdownImportValidationError, match="frontmatter"): + load_markdown_payload(source) + + +def test_chatgpt_adapter_loads_fixture_with_deterministic_mapping() -> None: + first = load_chatgpt_payload(CHATGPT_FIXTURE) + second = load_chatgpt_payload(CHATGPT_FIXTURE) + + assert first.context.fixture_id == "chatgpt-s37-workspace-v1" + assert first.context.workspace_id == "chatgpt-workspace-demo-001" + assert first.context.workspace_name == "ChatGPT Import Demo" + assert len(first.items) == 5 + + first_item = first.items[0] + assert first_item.object_type == "Decision" + assert first_item.status == "active" + assert first_item.body["decision_text"] == "Keep ChatGPT import provenance explicit for every message." + assert first_item.source_provenance["project"] == "ChatGPT Import Project" + + assert [item.dedupe_key for item in first.items] == [item.dedupe_key for item in second.items] + + +def test_chatgpt_adapter_rejects_invalid_payload(tmp_path: Path) -> None: + source = tmp_path / "invalid.json" + source.write_text(json.dumps({"workspace": {"id": "x"}}), encoding="utf-8") + + with pytest.raises(ChatGPTImportValidationError, match="must include one of"): + load_chatgpt_payload(source) diff --git a/tests/unit/test_phase9_eval.py b/tests/unit/test_phase9_eval.py new file mode 100644 index 0000000..67adfc4 --- /dev/null +++ b/tests/unit/test_phase9_eval.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from alicebot_api.retrieval_evaluation import calculate_phase9_metric_ratio, write_phase9_evaluation_report + + +def test_phase9_ratio_handles_zero_total() -> None: + assert calculate_phase9_metric_ratio(passed_count=0, total_count=0) == 0.0 + + +def test_phase9_ratio_calculates_fraction() -> None: + assert calculate_phase9_metric_ratio(passed_count=2, total_count=4) == 0.5 + + +def test_phase9_report_writer_persists_json(tmp_path: Path) -> None: + report = { + "schema_version": "phase9_eval_v1", + "summary": { + "status": "pass", + "importer_count": 3, + }, + } + + output_path = write_phase9_evaluation_report( + report=report, + report_path=tmp_path / "phase9_eval.json", + ) + + assert output_path.exists() + saved = json.loads(output_path.read_text(encoding="utf-8")) + assert saved == report