diff --git a/.ai/active/SPRINT_PACKET.md b/.ai/active/SPRINT_PACKET.md index b7bcd90..87b7769 100644 --- a/.ai/active/SPRINT_PACKET.md +++ b/.ai/active/SPRINT_PACKET.md @@ -2,7 +2,7 @@ ## Sprint Title -Sprint 5H: Semantic Artifact Chunk Retrieval Primitive +Sprint 5I: Compile-Path Semantic Artifact Retrieval Adoption ## Sprint Type @@ -10,15 +10,15 @@ feature ## Sprint Reason -Milestone 5 now has deterministic artifact chunk ingestion, lexical retrieval, compile-path lexical artifact inclusion, and durable artifact-chunk embedding storage. The next safe step is a direct semantic retrieval primitive over those stored chunk embeddings, while still deferring compile-path semantic use, hybrid artifact retrieval, connectors, and UI. +Milestone 5 now has deterministic lexical artifact retrieval in the compile path and a separate direct semantic artifact retrieval primitive over durable chunk embeddings. The next safe step is to adopt semantic artifact retrieval into the compiler as a separate context section, while still deferring hybrid lexical-plus-semantic artifact merging, reranking, connectors, and UI. ## Sprint Intent -Add the first read-side semantic retrieval primitive over stored `task_artifact_chunk_embeddings`, with explicit embedding-config selection and deterministic result ordering, without yet wiring semantic artifact retrieval into the compile path or combining it with lexical artifact retrieval. +Extend the existing context-compile path so it can optionally retrieve semantic artifact chunks using the shipped `task_artifact_chunk_embeddings` retrieval primitive, while keeping semantic artifact results separate from lexical artifact retrieval and deferring hybrid artifact fusion. ## Git Instructions -- Branch Name: `codex/sprint-5h-semantic-artifact-chunk-retrieval` +- Branch Name: `codex/sprint-5i-compile-semantic-artifact-retrieval` - Base Branch: `main` - PR Strategy: one sprint branch, one PR, no stacked PRs unless Control Tower explicitly opens a follow-up sprint - Merge Policy: squash merge only after reviewer `PASS` and explicit Control Tower merge approval @@ -28,44 +28,45 @@ Add the first read-side semantic retrieval primitive over stored `task_artifact_ - Sprint 5A shipped deterministic rooted task-workspace provisioning. - Sprint 5C shipped explicit task-artifact registration. - Sprint 5D shipped deterministic local artifact ingestion into durable chunk rows. -- Sprint 5E shipped deterministic lexical retrieval over those chunk rows. +- Sprint 5E shipped deterministic lexical artifact-chunk retrieval. - Sprint 5F shipped compile-path lexical artifact chunk inclusion. -- Sprint 5G shipped durable artifact-chunk embedding persistence tied to existing embedding configs. -- The next narrow Milestone 5 seam is semantic artifact retrieval over those stored vectors only, so later compile adoption and hybrid artifact retrieval can build on an explicit retrieval primitive instead of hidden assumptions. +- Sprint 5G shipped durable artifact-chunk embedding persistence. +- Sprint 5H shipped direct semantic artifact retrieval over those durable chunk embeddings. +- The next narrow Milestone 5 seam is compile-path adoption of semantic artifact retrieval only, so later hybrid artifact retrieval can build on an explicit compile-time semantic section instead of collapsing lexical and semantic behavior in one sprint. ## In Scope - Define typed contracts for: - - semantic artifact retrieval requests - - semantic artifact retrieval result items - - retrieval summary metadata -- Implement a narrow semantic retrieval seam that: - - accepts an explicit `embedding_config_id` - - accepts a caller-supplied query vector - - searches only durable `task_artifact_chunk_embeddings` - - joins to visible `task_artifact_chunks` and visible `task_artifacts` - - scopes retrieval by the current user plus one explicit task or one explicit artifact - - validates query-vector dimension against the chosen embedding config - - computes similarity using the stored vectors already persisted in the repo - - returns deterministic ordered chunk results with explicit score metadata - - excludes artifacts that are not yet ingested -- Implement the minimal API or service paths needed for: - - semantic retrieval for one task - - semantic retrieval for one artifact when the caller wants a narrower scope + - optional semantic artifact retrieval input on compile requests + - semantic artifact chunk result items inside the compiled context pack + - semantic artifact retrieval summary metadata inside compile responses + - semantic artifact retrieval trace payloads +- Extend the compile path so it can: + - accept an explicit semantic artifact retrieval request scoped to one visible task or one visible artifact + - accept an explicit `embedding_config_id` and caller-supplied query vector + - reuse the existing semantic artifact retrieval primitive during compile + - include semantic artifact chunks in a separate context-pack section + - record semantic artifact include/exclude decisions in `trace_events` + - preserve deterministic output for the same stored data and inputs +- Ensure compile behavior: + - leaves current continuity, memory, entity, lexical artifact, and other context sections intact + - does not merge lexical and semantic artifact sections + - excludes non-ingested artifacts + - scopes strictly by user ownership + - uses deterministic ordering and explicit per-section limits - Add unit and integration tests for: - - dimension validation - - deterministic retrieval ordering and tie-breaking - - scoped retrieval by task and by artifact - - empty-result behavior + - compile request validation for semantic artifact retrieval input + - deterministic semantic artifact section ordering - exclusion of non-ingested artifacts - - per-user isolation - - stable response shape + - trace logging for included and excluded semantic artifact chunks + - per-user isolation through the compile path + - response-shape stability for the new semantic artifact section ## Out of Scope -- No compile-path semantic artifact retrieval yet. - No hybrid lexical plus semantic artifact retrieval. -- No reranking layer beyond direct similarity ordering. +- No reranking across semantic artifact chunks. +- No lexical-plus-semantic deduplication or fusion. - No model or external API calls to generate query embeddings. - No richer document parsing beyond the already-shipped local text ingestion seam. - No Gmail or Calendar connector scope. @@ -74,59 +75,61 @@ Add the first read-side semantic retrieval primitive over stored `task_artifact_ ## Required Deliverables -- Stable semantic artifact retrieval request and response contracts. -- Minimal deterministic semantic retrieval path over existing `task_artifact_chunk_embeddings`. -- Unit and integration coverage for ordering, validation, scoping, exclusion rules, and isolation. +- Stable compile-request and compile-response contract updates for semantic artifact retrieval input and output. +- Compile-path integration with the existing semantic artifact retrieval primitive. +- Trace coverage for semantic artifact retrieval decisions inside compile runs. +- Unit and integration coverage for compile-path semantic artifact behavior, ordering, exclusion rules, validation, and isolation. - Updated `BUILD_REPORT.md` with exact verification results and explicit deferred scope. ## Acceptance Criteria -- A client can submit a query vector plus `embedding_config_id` and retrieve relevant visible artifact chunks for one task. -- A client can submit a query vector plus `embedding_config_id` and retrieve relevant visible artifact chunks for one artifact. -- Retrieval uses only durable `task_artifact_chunk_embeddings`, `task_artifact_chunks`, and artifact records already persisted in the repo. -- Retrieval rejects missing configs, dimension mismatches, and cross-user access deterministically. -- Non-ingested artifacts are excluded from semantic retrieval results. -- Result ordering is deterministic and documented. +- `POST /v0/context/compile` can optionally accept semantic artifact retrieval input and return a separate semantic artifact chunk section in the context pack. +- Compile-path semantic artifact retrieval uses only durable `task_artifact_chunk_embeddings`, `task_artifact_chunks`, and artifact records already persisted in the repo. +- Compile-path semantic artifact retrieval rejects missing configs, dimension mismatches, and cross-user access deterministically. +- Non-ingested artifacts are excluded from semantic artifact compile results. +- Semantic artifact include/exclude decisions are persisted in `trace_events`. +- Result ordering is deterministic within the semantic artifact section. - `./.venv/bin/python -m pytest tests/unit` passes. - `./.venv/bin/python -m pytest tests/integration` passes. -- No compile integration changes, hybrid retrieval, connector, runner, UI, or broader side-effect scope enters the sprint. +- No hybrid retrieval, reranking, connector, runner, UI, or broader side-effect scope enters the sprint. ## Implementation Constraints -- Keep semantic retrieval narrow and boring. -- Reuse existing embedding configs and durable artifact chunk embeddings; do not introduce a second embedding store. -- Use explicit caller-selected config and query vector input; do not auto-pick configs. -- Keep scope explicit: one task or one artifact per request. -- Do not merge semantic artifact retrieval into the main compiler in the same sprint. +- Keep compile-path adoption narrow and boring. +- Reuse the existing semantic artifact retrieval primitive; do not read raw files during compile. +- Keep semantic artifact chunks in a separate response section from lexical artifact chunks and from memory/entity context. +- Require explicit semantic artifact input; do not auto-enable semantic retrieval. +- Do not merge semantic and lexical artifact retrieval in the same sprint. ## Suggested Work Breakdown -1. Define semantic artifact retrieval request and response contracts. -2. Implement deterministic similarity search over existing artifact chunk embeddings. -3. Add explicit task-scoped and artifact-scoped semantic retrieval paths. -4. Enforce config validation, non-ingested exclusion, and current-user isolation. +1. Define compile contract updates for optional semantic artifact retrieval input and output. +2. Integrate the existing semantic artifact retrieval primitive into the compile path. +3. Add semantic artifact result summaries and trace-event payloads. +4. Preserve current context sections while adding a separate semantic artifact section. 5. Add unit and integration tests. 6. Update `BUILD_REPORT.md` with executed verification. ## Build Report Requirements `BUILD_REPORT.md` must include: -- the exact semantic artifact retrieval contracts introduced -- the similarity metric and ordering rule used +- the exact compile contract changes introduced +- the semantic artifact similarity metric and ordering rule used - exact commands run - unit and integration test results -- one example task-scoped semantic retrieval response -- one example artifact-scoped semantic retrieval response +- one example compile request and response showing the semantic artifact section +- one example of semantic artifact retrieval trace events inside one compile run - what remains intentionally deferred to later milestones ## Review Focus `REVIEW_REPORT.md` should verify: -- the sprint stayed limited to the semantic artifact chunk retrieval primitive -- retrieval is explicit-config, durable-source-only, and validation-backed -- ordering, exclusion rules, and isolation are test-backed -- no hidden compile integration changes, hybrid retrieval, connector, runner, UI, or broader side-effect scope entered the sprint +- the sprint stayed limited to compile-path semantic artifact retrieval adoption +- semantic artifact retrieval is explicit-input, durable-source-only, and validation-backed +- lexical and semantic artifact results remain separate rather than merged +- ordering, exclusion rules, trace visibility, and isolation are test-backed +- no hidden hybrid retrieval, reranking, connector, runner, UI, or broader side-effect scope entered the sprint ## Exit Condition -This sprint is complete when the repo can retrieve relevant ingested artifact chunks through a deterministic semantic read path scoped to one task or one artifact, verify the full path with Postgres-backed tests, and still defer compile-path semantic use, hybrid artifact retrieval, connectors, and UI. +This sprint is complete when the repo can optionally include semantic artifact chunks inside `POST /v0/context/compile`, trace those semantic inclusion decisions, and verify the full path with Postgres-backed tests, while still deferring hybrid artifact retrieval, reranking, connectors, and UI. diff --git a/BUILD_REPORT.md b/BUILD_REPORT.md index afabc87..d3bde52 100644 --- a/BUILD_REPORT.md +++ b/BUILD_REPORT.md @@ -2,212 +2,255 @@ ## sprint objective -Implement Sprint 5H: Semantic Artifact Chunk Retrieval Primitive by adding a deterministic, explicit-config semantic retrieval path over durable `task_artifact_chunk_embeddings`, scoped to one task or one artifact, without changing compile behavior or introducing hybrid retrieval, connectors, runners, or UI work. +Implement Sprint 5I: adopt semantic artifact retrieval into `POST /v0/context/compile` as an explicit, separate compile-time section backed only by durable `task_artifact_chunk_embeddings`, `task_artifact_chunks`, and `task_artifacts`, while keeping lexical and semantic artifact retrieval separate. ## completed work -- Added semantic artifact retrieval contracts: - - `TaskScopedSemanticArtifactChunkRetrievalInput` - - `task_id` - - `embedding_config_id` - - `query_vector` - - `limit` - - `ArtifactScopedSemanticArtifactChunkRetrievalInput` - - `task_artifact_id` - - `embedding_config_id` - - `query_vector` - - `limit` - - `TaskArtifactChunkSemanticRetrievalItem` - - `id` - - `task_id` - - `task_artifact_id` - - `relative_path` - - `media_type` - - `sequence_no` - - `char_start` - - `char_end_exclusive` - - `text` - - `score` - - `TaskArtifactChunkSemanticRetrievalSummary` - - `embedding_config_id` - - `query_vector_dimensions` - - `limit` - - `returned_count` - - `searched_artifact_count` - - `similarity_metric` - - `order` - - `scope` - - `TaskArtifactChunkSemanticRetrievalResponse` - - `TASK_ARTIFACT_CHUNK_SEMANTIC_RETRIEVAL_ORDER = ["score_desc", "relative_path_asc", "sequence_no_asc", "id_asc"]` -- Implemented semantic artifact retrieval validation and service logic: - - validates that `embedding_config_id` resolves to a visible embedding config - - validates that every query-vector element is finite numeric input - - validates `len(query_vector) == embedding_config.dimensions` - - requires one explicit scope: - - task-scoped retrieval via visible `task_id` - - artifact-scoped retrieval via visible `task_artifact_id` - - excludes artifacts whose `ingestion_status` is not `ingested` - - preserves user isolation through the existing visible-row store lookups -- Added deterministic store queries over durable artifact embedding rows only: - - task scope joins: - - `task_artifact_chunk_embeddings` - - `task_artifact_chunks` - - `task_artifacts` - - artifact scope joins the same durable tables with a narrower artifact filter - - no compile-path semantic use was added - - no second embedding store was introduced -- Added minimal API surface: - - `POST /v0/tasks/{task_id}/artifact-chunks/semantic-retrieval` - - `POST /v0/task-artifacts/{task_artifact_id}/chunks/semantic-retrieval` -- Added tests for: - - dimension validation - - deterministic ordering and tie-breaking - - task-scoped retrieval - - artifact-scoped retrieval - - empty-result behavior +- Added compile-request contracts for `semantic_artifact_retrieval` with explicit task-scoped and artifact-scoped variants: + - `kind` + - `task_id` or `task_artifact_id` + - `embedding_config_id` + - `query_vector` + - `limit` +- Added compile-response contracts for: + - `context_pack.semantic_artifact_chunks` + - `context_pack.semantic_artifact_chunk_summary` +- Added semantic artifact trace contracts for per-item include/exclude decisions with: + - scope + - artifact identity + - ingestion status + - embedding config id + - query vector dimensions + - limit + - similarity metric + - score and chunk coordinates when applicable +- Integrated semantic artifact retrieval into the compiler as an explicit optional path. +- Reused the shipped semantic artifact retrieval primitive for compile-path section assembly, then evaluated the full deterministic candidate set for compile-only include/exclude tracing and counts. +- Preserved existing compile sections and behavior for: + - continuity scope + - hybrid memory + - lexical artifact retrieval + - entities + - entity edges +- Kept lexical artifact chunks and semantic artifact chunks in separate response sections. +- Added trace coverage for: + - `within_semantic_artifact_chunk_limit` + - `semantic_artifact_chunk_limit_exceeded` + - `semantic_artifact_not_ingested` +- Added summary trace fields for semantic artifact retrieval request state, scope, candidate count, included count, limit exclusions, and non-ingested exclusions. +- Updated prompt-assembly context serialization so compiled context packs include the new semantic artifact section shape. +- Added unit and integration coverage for: + - request-shape validation + - config existence validation + - query-vector dimension validation + - deterministic ordering - exclusion of non-ingested artifacts + - trace logging for included and excluded semantic artifact results - per-user isolation - - stable response shape + - response-shape stability + +## exact compile contract changes introduced + +- Request: + - `CompileContextRequest.semantic_artifact_retrieval` + - `CompileContextTaskScopedSemanticArtifactRetrievalRequest` + - `CompileContextArtifactScopedSemanticArtifactRetrievalRequest` + - `CompileContextTaskScopedSemanticArtifactRetrievalInput` + - `CompileContextArtifactScopedSemanticArtifactRetrievalInput` +- Response: + - `CompiledContextPack.semantic_artifact_chunks` + - `CompiledContextPack.semantic_artifact_chunk_summary` + - `ContextPackSemanticArtifactChunk` + - `ContextPackSemanticArtifactChunkSummary` +- Trace payloads: + - `SemanticArtifactRetrievalDecisionTracePayload` +- Summary event additions: + - `semantic_artifact_retrieval_requested` + - `semantic_artifact_retrieval_scope_kind` + - `semantic_artifact_chunk_candidate_count` + - `included_semantic_artifact_chunk_count` + - `excluded_semantic_artifact_chunk_limit_count` + - `excluded_semantic_uningested_artifact_count` ## similarity metric and ordering rule used -- Similarity metric: - - `cosine_similarity` - - computed in SQL as `1 - (embeddings.vector <=> query_vector)` via pgvector cosine distance -- Ordering rule: - - `score DESC` - - `relative_path ASC` - - `sequence_no ASC` - - `id ASC` -- Durable source restriction: - - retrieval reads only from persisted `task_artifact_chunk_embeddings`, `task_artifact_chunks`, and `task_artifacts` +- Similarity metric: `cosine_similarity` +- Ordering rule: `score_desc`, `relative_path_asc`, `sequence_no_asc`, `id_asc` +- Compile candidate evaluation stays deterministic by using the durable semantic retrieval ordering and then applying explicit compile-time slicing by `limit`. ## incomplete work -- None within Sprint 5H scope. +- None within Sprint 5I scope. ## files changed +- `apps/api/src/alicebot_api/compiler.py` - `apps/api/src/alicebot_api/contracts.py` - `apps/api/src/alicebot_api/main.py` +- `apps/api/src/alicebot_api/response_generation.py` - `apps/api/src/alicebot_api/semantic_retrieval.py` -- `apps/api/src/alicebot_api/store.py` -- `tests/integration/test_semantic_artifact_chunk_retrieval_api.py` -- `tests/unit/test_artifacts_main.py` +- `tests/integration/test_context_compile.py` +- `tests/unit/test_compiler.py` - `tests/unit/test_main.py` -- `tests/unit/test_semantic_retrieval.py` -- `tests/unit/test_task_artifact_chunk_embedding_store.py` +- `tests/unit/test_response_generation.py` - `BUILD_REPORT.md` ## tests run -- `./.venv/bin/python -m pytest tests/unit/test_semantic_retrieval.py tests/unit/test_task_artifact_chunk_embedding_store.py tests/unit/test_artifacts_main.py tests/unit/test_main.py` - - result: `65 passed in 0.55s` -- `./.venv/bin/python -m pytest tests/integration/test_semantic_artifact_chunk_retrieval_api.py` - - first sandboxed attempt failed because local Postgres access to `localhost:5432` was blocked by the sandbox -- `./.venv/bin/python -m pytest tests/integration/test_semantic_artifact_chunk_retrieval_api.py` - - result after allowing local Postgres access: `3 passed in 1.23s` +- `./.venv/bin/python -m pytest tests/unit/test_compiler.py tests/unit/test_main.py tests/unit/test_response_generation.py` + - result: `50 passed in 0.48s` +- `./.venv/bin/python -m pytest tests/integration/test_context_compile.py` + - sandboxed attempt failed because localhost Postgres access was blocked + - rerun with local DB access allowed: `11 passed in 3.85s` - `./.venv/bin/python -m pytest tests/unit` - - result: `377 passed in 0.59s` + - result: `380 passed in 0.61s` - `./.venv/bin/python -m pytest tests/integration` - - result: `114 passed in 34.94s` + - result: `117 passed in 36.46s` + +## unit and integration test results -## example task-scoped semantic retrieval response +- Unit suite status: pass +- Integration suite status: pass +- Acceptance-criteria verification: + - compile request validation: covered and passing + - deterministic semantic artifact ordering: covered and passing + - exclusion of non-ingested artifacts: covered and passing + - include/exclude trace logging: covered and passing + - per-user isolation: covered and passing + - response-shape stability: covered and passing + +## example compile request ```json { - "items": [ - { - "id": "11111111-1111-1111-1111-111111111111", - "task_id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", - "task_artifact_id": "22222222-2222-2222-2222-222222222222", - "relative_path": "docs/a.txt", - "media_type": "text/plain", - "sequence_no": 1, - "char_start": 0, - "char_end_exclusive": 9, - "text": "alpha doc", - "score": 1.0 - }, - { - "id": "33333333-3333-3333-3333-333333333333", - "task_id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", - "task_artifact_id": "44444444-4444-4444-4444-444444444444", - "relative_path": "notes/b.md", - "media_type": "text/markdown", - "sequence_no": 1, - "char_start": 0, - "char_end_exclusive": 10, - "text": "alpha note", - "score": 1.0 - } - ], - "summary": { - "embedding_config_id": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", - "query_vector_dimensions": 3, - "limit": 10, - "returned_count": 2, - "searched_artifact_count": 3, - "similarity_metric": "cosine_similarity", - "order": ["score_desc", "relative_path_asc", "sequence_no_asc", "id_asc"], - "scope": { - "kind": "task", - "task_id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" - } + "user_id": "11111111-1111-1111-8111-111111111111", + "thread_id": "22222222-2222-2222-8222-222222222222", + "semantic_artifact_retrieval": { + "kind": "task", + "task_id": "33333333-3333-3333-8333-333333333333", + "embedding_config_id": "44444444-4444-4444-8444-444444444444", + "query_vector": [1.0, 0.0, 0.0], + "limit": 2 } } ``` -## example artifact-scoped semantic retrieval response +## example compile response showing semantic artifact section ```json { - "items": [ - { - "id": "33333333-3333-3333-3333-333333333333", - "task_id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", - "task_artifact_id": "44444444-4444-4444-4444-444444444444", - "relative_path": "notes/b.md", - "media_type": "text/markdown", + "context_pack": { + "semantic_artifact_chunks": [ + { + "id": "55555555-5555-5555-8555-555555555555", + "task_id": "33333333-3333-3333-8333-333333333333", + "task_artifact_id": "66666666-6666-6666-8666-666666666666", + "relative_path": "docs/a.txt", + "media_type": "text/plain", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 14, + "text": "beta alpha doc", + "score": 1.0 + }, + { + "id": "77777777-7777-7777-8777-777777777777", + "task_id": "33333333-3333-3333-8333-333333333333", + "task_artifact_id": "88888888-8888-8888-8888-888888888888", + "relative_path": "notes/b.md", + "media_type": "text/markdown", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 15, + "text": "alpha beta note", + "score": 1.0 + } + ], + "semantic_artifact_chunk_summary": { + "requested": true, + "scope": { + "kind": "task", + "task_id": "33333333-3333-3333-8333-333333333333" + }, + "embedding_config_id": "44444444-4444-4444-8444-444444444444", + "query_vector_dimensions": 3, + "limit": 2, + "searched_artifact_count": 3, + "candidate_count": 3, + "included_count": 2, + "excluded_uningested_artifact_count": 1, + "excluded_limit_count": 1, + "similarity_metric": "cosine_similarity", + "order": ["score_desc", "relative_path_asc", "sequence_no_asc", "id_asc"] + } + } +} +``` + +## example semantic artifact retrieval trace events inside one compile run + +```json +[ + { + "kind": "context.included", + "payload": { + "entity_type": "semantic_artifact_chunk", + "entity_id": "55555555-5555-5555-8555-555555555555", + "reason": "within_semantic_artifact_chunk_limit", + "position": 1, + "scope_kind": "task", + "task_id": "33333333-3333-3333-8333-333333333333", + "task_artifact_id": "66666666-6666-6666-8666-666666666666", + "relative_path": "docs/a.txt", + "media_type": "text/plain", + "ingestion_status": "ingested", + "embedding_config_id": "44444444-4444-4444-8444-444444444444", + "query_vector_dimensions": 3, + "limit": 2, + "similarity_metric": "cosine_similarity", + "score": 1.0, "sequence_no": 1, "char_start": 0, - "char_end_exclusive": 10, - "text": "alpha note", - "score": 1.0 + "char_end_exclusive": 14 } - ], - "summary": { - "embedding_config_id": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", - "query_vector_dimensions": 3, - "limit": 10, - "returned_count": 1, - "searched_artifact_count": 1, - "similarity_metric": "cosine_similarity", - "order": ["score_desc", "relative_path_asc", "sequence_no_asc", "id_asc"], - "scope": { - "kind": "artifact", - "task_id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", - "task_artifact_id": "44444444-4444-4444-4444-444444444444" + }, + { + "kind": "context.excluded", + "payload": { + "entity_type": "task_artifact", + "entity_id": "99999999-9999-9999-8999-999999999999", + "reason": "semantic_artifact_not_ingested", + "position": 3, + "scope_kind": "task", + "task_id": "33333333-3333-3333-8333-333333333333", + "task_artifact_id": "99999999-9999-9999-8999-999999999999", + "relative_path": "notes/hidden.txt", + "media_type": "text/plain", + "ingestion_status": "pending", + "embedding_config_id": "44444444-4444-4444-8444-444444444444", + "query_vector_dimensions": 3, + "limit": 2, + "similarity_metric": "cosine_similarity" } } -} +] ``` ## blockers/issues -- No code blocker remained after implementation. -- Integration verification required access to the local Postgres instance because sandboxed localhost TCP connections were blocked. +- No implementation blockers remained. +- Integration verification required local Postgres access outside the default sandbox because sandboxed TCP access to `localhost:5432` is not permitted. ## what remains intentionally deferred to later milestones -- compile-path semantic artifact retrieval -- lexical plus semantic hybrid artifact retrieval -- reranking beyond direct similarity ordering -- query embedding generation through a model or external API +- hybrid lexical-plus-semantic artifact retrieval +- lexical/semantic deduplication or fusion +- reranking across semantic artifact chunks +- model-generated query embeddings - connectors - runner orchestration - UI work ## recommended next step -Adopt this new semantic artifact retrieval primitive in a follow-up sprint that explicitly decides how compile should consume semantic artifact chunks, without combining that change with hybrid retrieval or reranking in the same step. +Implement the follow-up sprint for hybrid compile-path artifact fusion only after agreeing on explicit merge, deduplication, and reranking rules between lexical and semantic artifact sections. diff --git a/REVIEW_REPORT.md b/REVIEW_REPORT.md index ff292f5..ca4f29b 100644 --- a/REVIEW_REPORT.md +++ b/REVIEW_REPORT.md @@ -6,30 +6,18 @@ PASS ## criteria met -- Sprint scope stayed narrow. The code changes remain limited to semantic artifact-chunk retrieval contracts, service logic, store queries, routes, tests, and the build report. -- Typed contracts for task-scoped and artifact-scoped semantic retrieval were added in `apps/api/src/alicebot_api/contracts.py`. -- The retrieval seam requires an explicit `embedding_config_id`, accepts a caller-supplied `query_vector`, validates finite numeric input, and rejects dimension mismatches in `apps/api/src/alicebot_api/semantic_retrieval.py`. -- Retrieval reads only from durable `task_artifact_chunk_embeddings`, `task_artifact_chunks`, and `task_artifacts`, with explicit task or artifact scope and deterministic ordering in `apps/api/src/alicebot_api/store.py`. -- Non-ingested artifacts are excluded from result rows in SQL and from `searched_artifact_count` summaries. -- Minimal API surface was added for both scopes in `apps/api/src/alicebot_api/main.py`: - - `POST /v0/tasks/{task_id}/artifact-chunks/semantic-retrieval` - - `POST /v0/task-artifacts/{task_artifact_id}/chunks/semantic-retrieval` -- Required test coverage is present: - - unit coverage for stable response shape, validation, task scope, artifact scope, and non-ingested behavior in `tests/unit/test_semantic_retrieval.py` - - route coverage in `tests/unit/test_artifacts_main.py` and `tests/unit/test_main.py` - - store-query coverage in `tests/unit/test_task_artifact_chunk_embedding_store.py` - - Postgres-backed integration coverage for deterministic ordering, scoping, empty results, exclusion rules, and per-user isolation in `tests/integration/test_semantic_artifact_chunk_retrieval_api.py` -- Verification already performed during review: - - `./.venv/bin/python -m pytest tests/unit` -> `377 passed in 0.58s` - - `./.venv/bin/python -m pytest tests/integration/test_semantic_artifact_chunk_retrieval_api.py` -> `3 passed in 1.36s` - - `./.venv/bin/python -m pytest tests/integration` -> `114 passed in 36.27s` -- `BUILD_REPORT.md` includes the new contracts, ordering rule, commands run, examples, and deferred scope. -- `ARCHITECTURE.md` now matches Sprint 5H: - - implemented slice updated to Sprint 5H - - semantic artifact-chunk retrieval described as shipped behavior - - semantic artifact retrieval endpoints listed in the runtime inventory - - repo/testing summaries extended through Sprint 5H - - deferred-scope language narrowed to compile-path semantic use, hybrid retrieval, and reranking +- `POST /v0/context/compile` now accepts optional `semantic_artifact_retrieval` input and returns a separate `context_pack.semantic_artifact_chunks` section plus `semantic_artifact_chunk_summary`. +- Compile-path semantic artifact retrieval is backed by durable artifact chunk embedding tables and does not read raw files during compile. +- Validation is enforced for missing embedding configs, query-vector dimension mismatches, and cross-user access. +- Semantic artifact chunks remain separate from lexical artifact chunks and existing memory/entity sections. +- Non-ingested artifacts are excluded from semantic artifact compile results and exclusion decisions are traced. +- Include/exclude decisions for semantic artifact retrieval are persisted in `trace_events`. +- Ordering is deterministic and matches the documented semantic retrieval order: `score_desc`, `relative_path_asc`, `sequence_no_asc`, `id_asc`. +- Scope stayed within the sprint packet: no hybrid lexical/semantic fusion, reranking, connector work, runner orchestration, or UI work was introduced. +- `BUILD_REPORT.md` was updated with contract changes, verification commands, examples, and deferred scope. +- Review verification passed: + - `./.venv/bin/python -m pytest tests/unit` -> `380 passed` + - `./.venv/bin/python -m pytest tests/integration` -> `117 passed` ## criteria missed @@ -37,17 +25,15 @@ PASS ## quality issues -- No blocking implementation, test, or documentation issues remain. +- No blocking quality issues found in the changed scope. ## regression risks -- Low runtime risk. The feature is additive, isolated, and backed by full unit and integration suite passes. -- Low review risk on the follow-up because the only additional change after the prior review was documentation in `ARCHITECTURE.md`. +- Low. The change is narrowly scoped to compile-path semantic artifact retrieval and is covered by unit and Postgres-backed integration tests for ordering, exclusion rules, validation, tracing, and isolation. ## docs issues -- None remaining for Sprint 5H. -- Note: no tests were rerun for the docs-only follow-up, which is appropriate because the code under review did not change after the previously verified green test runs. +- No documentation issues found within sprint scope. ## should anything be added to RULES.md? @@ -55,8 +41,8 @@ PASS ## should anything update ARCHITECTURE.md? -- No. The previously identified architecture drift has been corrected. +- No. ## recommended next action -1. Treat Sprint 5H as review-passed. +- Mark Sprint 5I as review-approved and proceed only with the explicitly deferred follow-up work, starting with a separate sprint for hybrid lexical/semantic artifact behavior if desired. diff --git a/apps/api/src/alicebot_api/compiler.py b/apps/api/src/alicebot_api/compiler.py index 770319c..46a89b1 100644 --- a/apps/api/src/alicebot_api/compiler.py +++ b/apps/api/src/alicebot_api/compiler.py @@ -6,10 +6,13 @@ from alicebot_api.contracts import ( COMPILER_VERSION_V0, ArtifactRetrievalDecisionTracePayload, + CompileContextArtifactScopedSemanticArtifactRetrievalInput, CompilerDecision, CompileContextArtifactRetrievalInput, CompileContextArtifactScopedArtifactRetrievalInput, CompileContextSemanticRetrievalInput, + CompileContextSemanticArtifactRetrievalInput, + CompileContextTaskScopedSemanticArtifactRetrievalInput, CompileContextTaskScopedArtifactRetrievalInput, CompilerRunResult, CompiledContextPack, @@ -19,10 +22,14 @@ ContextPackHybridMemorySummary, ContextPackMemory, ContextPackMemorySummary, + ContextPackSemanticArtifactChunk, + ContextPackSemanticArtifactChunkSummary, HybridMemoryDecisionTracePayload, MemorySelectionSource, SEMANTIC_MEMORY_RETRIEVAL_ORDER, TASK_ARTIFACT_CHUNK_RETRIEVAL_ORDER, + TASK_ARTIFACT_CHUNK_SEMANTIC_RETRIEVAL_ORDER, + SemanticArtifactRetrievalDecisionTracePayload, SemanticMemoryRetrievalRequestInput, TRACE_KIND_CONTEXT_COMPILE, TraceEventRecord, @@ -36,7 +43,13 @@ resolve_artifact_chunk_retrieval_query_terms, retrieve_matching_task_artifact_chunks, ) -from alicebot_api.semantic_retrieval import validate_semantic_memory_retrieval_request +from alicebot_api.semantic_retrieval import ( + retrieve_artifact_scoped_semantic_artifact_chunk_records, + retrieve_task_scoped_semantic_artifact_chunk_records, + serialize_semantic_artifact_chunk_result_item, + validate_semantic_artifact_chunk_retrieval_request, + validate_semantic_memory_retrieval_request, +) from alicebot_api.store import ( ContinuityStore, EntityEdgeRow, @@ -52,6 +65,7 @@ SUMMARY_TRACE_EVENT_KIND = "context.summary" _UNBOUNDED_SEMANTIC_RETRIEVAL_LIMIT = 2_147_483_647 +_UNBOUNDED_SEMANTIC_ARTIFACT_RETRIEVAL_LIMIT = 2_147_483_647 HYBRID_MEMORY_SOURCE_PRECEDENCE: list[MemorySelectionSource] = ["symbolic", "semantic"] HYBRID_SYMBOLIC_ORDER = ["updated_at_asc", "created_at_asc", "id_asc"] @@ -77,6 +91,13 @@ class CompiledArtifactChunkSection: decisions: list[CompilerDecision] +@dataclass(frozen=True, slots=True) +class CompiledSemanticArtifactChunkSection: + items: list[ContextPackSemanticArtifactChunk] + summary: ContextPackSemanticArtifactChunkSummary + decisions: list[CompilerDecision] + + @dataclass(slots=True) class HybridMemoryCandidate: memory: MemoryRow @@ -237,6 +258,23 @@ def _empty_artifact_chunk_summary() -> ContextPackArtifactChunkSummary: } +def _empty_semantic_artifact_chunk_summary() -> ContextPackSemanticArtifactChunkSummary: + return { + "requested": False, + "scope": None, + "embedding_config_id": None, + "query_vector_dimensions": 0, + "limit": 0, + "searched_artifact_count": 0, + "candidate_count": 0, + "included_count": 0, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "similarity_metric": None, + "order": list(TASK_ARTIFACT_CHUNK_SEMANTIC_RETRIEVAL_ORDER), + } + + def _artifact_retrieval_decision_metadata( *, scope_kind: str, @@ -273,6 +311,45 @@ def _artifact_retrieval_decision_metadata( return payload +def _semantic_artifact_retrieval_decision_metadata( + *, + scope_kind: str, + task_id: UUID, + task_artifact_id: UUID, + relative_path: str, + media_type: str | None, + ingestion_status: str, + embedding_config_id: UUID, + query_vector_dimensions: int, + limit: int, + score: float | None = None, + sequence_no: int | None = None, + char_start: int | None = None, + char_end_exclusive: int | None = None, +) -> SemanticArtifactRetrievalDecisionTracePayload: + payload: SemanticArtifactRetrievalDecisionTracePayload = { + "scope_kind": scope_kind, # type: ignore[typeddict-item] + "task_id": str(task_id), + "task_artifact_id": str(task_artifact_id), + "relative_path": relative_path, + "media_type": media_type, + "ingestion_status": ingestion_status, # type: ignore[typeddict-item] + "embedding_config_id": str(embedding_config_id), + "query_vector_dimensions": query_vector_dimensions, + "limit": limit, + "similarity_metric": "cosine_similarity", + } + if score is not None: + payload["score"] = score + if sequence_no is not None: + payload["sequence_no"] = sequence_no + if char_start is not None: + payload["char_start"] = char_start + if char_end_exclusive is not None: + payload["char_end_exclusive"] = char_end_exclusive + return payload + + def _hybrid_memory_decision_metadata( *, embedding_config_id: UUID | None, @@ -687,6 +764,158 @@ def _compile_artifact_chunk_section( ) +def _compile_semantic_artifact_chunk_section( + store: ContinuityStore, + *, + semantic_artifact_retrieval: CompileContextSemanticArtifactRetrievalInput | None, +) -> CompiledSemanticArtifactChunkSection: + if semantic_artifact_retrieval is None: + return CompiledSemanticArtifactChunkSection( + items=[], + summary=_empty_semantic_artifact_chunk_summary(), + decisions=[], + ) + + if isinstance( + semantic_artifact_retrieval, + CompileContextTaskScopedSemanticArtifactRetrievalInput, + ): + task = store.get_task_optional(semantic_artifact_retrieval.task_id) + if task is None: + raise TaskNotFoundError(f"task {semantic_artifact_retrieval.task_id} was not found") + artifact_rows = store.list_task_artifacts_for_task(semantic_artifact_retrieval.task_id) + scope_kind = "task" + section_payload = retrieve_task_scoped_semantic_artifact_chunk_records( + store, + user_id=task["id"], + request=semantic_artifact_retrieval, + ) + _config, query_vector = validate_semantic_artifact_chunk_retrieval_request( + store, + embedding_config_id=semantic_artifact_retrieval.embedding_config_id, + query_vector=semantic_artifact_retrieval.query_vector, + ) + matched_items = [ + serialize_semantic_artifact_chunk_result_item(row) + for row in store.retrieve_task_scoped_semantic_artifact_chunk_matches( + task_id=semantic_artifact_retrieval.task_id, + embedding_config_id=semantic_artifact_retrieval.embedding_config_id, + query_vector=query_vector, + limit=_UNBOUNDED_SEMANTIC_ARTIFACT_RETRIEVAL_LIMIT, + ) + ] + else: + artifact_row = store.get_task_artifact_optional( + semantic_artifact_retrieval.task_artifact_id + ) + if artifact_row is None: + raise TaskArtifactNotFoundError( + f"task artifact {semantic_artifact_retrieval.task_artifact_id} was not found" + ) + artifact_rows = [artifact_row] + scope_kind = "artifact" + section_payload = retrieve_artifact_scoped_semantic_artifact_chunk_records( + store, + user_id=artifact_row["task_id"], + request=semantic_artifact_retrieval, + ) + _config, query_vector = validate_semantic_artifact_chunk_retrieval_request( + store, + embedding_config_id=semantic_artifact_retrieval.embedding_config_id, + query_vector=semantic_artifact_retrieval.query_vector, + ) + matched_items = [ + serialize_semantic_artifact_chunk_result_item(row) + for row in store.retrieve_artifact_scoped_semantic_artifact_chunk_matches( + task_artifact_id=semantic_artifact_retrieval.task_artifact_id, + embedding_config_id=semantic_artifact_retrieval.embedding_config_id, + query_vector=query_vector, + limit=_UNBOUNDED_SEMANTIC_ARTIFACT_RETRIEVAL_LIMIT, + ) + ] + + included_items = list(section_payload["items"]) + excluded_uningested_artifact_count = 0 + decisions: list[CompilerDecision] = [] + + for position, artifact_row in enumerate(artifact_rows, start=1): + if artifact_row["ingestion_status"] == "ingested": + continue + excluded_uningested_artifact_count += 1 + decisions.append( + CompilerDecision( + "excluded", + "task_artifact", + artifact_row["id"], + "semantic_artifact_not_ingested", + position, + metadata=_semantic_artifact_retrieval_decision_metadata( + scope_kind=scope_kind, + task_id=artifact_row["task_id"], + task_artifact_id=artifact_row["id"], + relative_path=artifact_row["relative_path"], + media_type=infer_task_artifact_media_type(artifact_row), + ingestion_status=artifact_row["ingestion_status"], + embedding_config_id=semantic_artifact_retrieval.embedding_config_id, + query_vector_dimensions=len(query_vector), + limit=semantic_artifact_retrieval.limit, + ), + ) + ) + + for position, item in enumerate(matched_items, start=1): + decision_kind = "included" if position <= semantic_artifact_retrieval.limit else "excluded" + decision_reason = ( + "within_semantic_artifact_chunk_limit" + if position <= semantic_artifact_retrieval.limit + else "semantic_artifact_chunk_limit_exceeded" + ) + decisions.append( + CompilerDecision( + decision_kind, + "semantic_artifact_chunk", + UUID(item["id"]), + decision_reason, + position, + metadata=_semantic_artifact_retrieval_decision_metadata( + scope_kind=scope_kind, + task_id=UUID(item["task_id"]), + task_artifact_id=UUID(item["task_artifact_id"]), + relative_path=item["relative_path"], + media_type=item["media_type"], + ingestion_status="ingested", + embedding_config_id=semantic_artifact_retrieval.embedding_config_id, + query_vector_dimensions=len(query_vector), + limit=semantic_artifact_retrieval.limit, + score=item["score"], + sequence_no=item["sequence_no"], + char_start=item["char_start"], + char_end_exclusive=item["char_end_exclusive"], + ), + ) + ) + + section_summary = section_payload["summary"] + return CompiledSemanticArtifactChunkSection( + items=included_items, + summary={ + "requested": True, + "scope": section_summary["scope"], + "embedding_config_id": section_summary["embedding_config_id"], + "query_vector_dimensions": section_summary["query_vector_dimensions"], + "limit": section_summary["limit"], + "searched_artifact_count": section_summary["searched_artifact_count"], + "candidate_count": len(matched_items), + "included_count": len(included_items), + "excluded_uningested_artifact_count": excluded_uningested_artifact_count, + "excluded_limit_count": max(len(matched_items) - len(included_items), 0), + "similarity_metric": section_summary["similarity_metric"], + "order": list(section_summary["order"]), + }, + decisions=decisions, + ) + + def compile_continuity_context( *, user: UserRow, @@ -699,6 +928,7 @@ def compile_continuity_context( limits: ContextCompilerLimits, memory_section: CompiledMemorySection | None = None, artifact_chunk_section: CompiledArtifactChunkSection | None = None, + semantic_artifact_chunk_section: CompiledSemanticArtifactChunkSection | None = None, ) -> CompilerRunResult: latest_session_sequence: dict[UUID, int] = {} for event in events: @@ -797,6 +1027,15 @@ def compile_continuity_context( decisions=[], ) decisions.extend(resolved_artifact_chunk_section.decisions) + resolved_semantic_artifact_chunk_section = ( + semantic_artifact_chunk_section + or CompiledSemanticArtifactChunkSection( + items=[], + summary=_empty_semantic_artifact_chunk_summary(), + decisions=[], + ) + ) + decisions.extend(resolved_semantic_artifact_chunk_section.decisions) ordered_entities = sorted(entities, key=_entity_sort_key) included_entities = ordered_entities[-limits.max_entities :] if limits.max_entities > 0 else [] included_entity_ids = {entity["id"] for entity in included_entities} @@ -945,6 +1184,26 @@ def compile_continuity_context( "excluded_uningested_artifact_count": resolved_artifact_chunk_section.summary[ "excluded_uningested_artifact_count" ], + "semantic_artifact_retrieval_requested": resolved_semantic_artifact_chunk_section.summary[ + "requested" + ], + "semantic_artifact_retrieval_scope_kind": ( + None + if resolved_semantic_artifact_chunk_section.summary["scope"] is None + else resolved_semantic_artifact_chunk_section.summary["scope"]["kind"] + ), + "semantic_artifact_chunk_candidate_count": resolved_semantic_artifact_chunk_section.summary[ + "candidate_count" + ], + "included_semantic_artifact_chunk_count": resolved_semantic_artifact_chunk_section.summary[ + "included_count" + ], + "excluded_semantic_artifact_chunk_limit_count": resolved_semantic_artifact_chunk_section.summary[ + "excluded_limit_count" + ], + "excluded_semantic_uningested_artifact_count": resolved_semantic_artifact_chunk_section.summary[ + "excluded_uningested_artifact_count" + ], "included_entity_count": len(included_entities), "excluded_entity_count": excluded_entity_limit_count, "excluded_entity_limit_count": excluded_entity_limit_count, @@ -978,6 +1237,8 @@ def compile_continuity_context( "memory_summary": resolved_memory_section.summary, "artifact_chunks": list(resolved_artifact_chunk_section.items), "artifact_chunk_summary": resolved_artifact_chunk_section.summary, + "semantic_artifact_chunks": list(resolved_semantic_artifact_chunk_section.items), + "semantic_artifact_chunk_summary": resolved_semantic_artifact_chunk_section.summary, "entities": [_serialize_entity(entity) for entity in included_entities], "entity_summary": { "candidate_count": len(ordered_entities), @@ -1004,6 +1265,7 @@ def compile_and_persist_trace( limits: ContextCompilerLimits, semantic_retrieval: CompileContextSemanticRetrievalInput | None = None, artifact_retrieval: CompileContextArtifactRetrievalInput | None = None, + semantic_artifact_retrieval: CompileContextSemanticArtifactRetrievalInput | None = None, ) -> CompiledTraceRun: user = store.get_user(user_id) thread = store.get_thread(thread_id) @@ -1020,6 +1282,10 @@ def compile_and_persist_trace( store, artifact_retrieval=artifact_retrieval, ) + semantic_artifact_chunk_section = _compile_semantic_artifact_chunk_section( + store, + semantic_artifact_retrieval=semantic_artifact_retrieval, + ) entities = store.list_entities() ordered_entities = sorted(entities, key=_entity_sort_key) included_entities = ordered_entities[-limits.max_entities :] if limits.max_entities > 0 else [] @@ -1035,6 +1301,7 @@ def compile_and_persist_trace( limits=limits, memory_section=memory_section, artifact_chunk_section=artifact_chunk_section, + semantic_artifact_chunk_section=semantic_artifact_chunk_section, ) trace = store.create_trace( user_id=user_id, diff --git a/apps/api/src/alicebot_api/contracts.py b/apps/api/src/alicebot_api/contracts.py index 07fa214..8d4882b 100644 --- a/apps/api/src/alicebot_api/contracts.py +++ b/apps/api/src/alicebot_api/contracts.py @@ -251,6 +251,46 @@ def as_payload(self) -> JsonObject: ) +@dataclass(frozen=True, slots=True) +class CompileContextTaskScopedSemanticArtifactRetrievalInput: + task_id: UUID + embedding_config_id: UUID + query_vector: tuple[float, ...] + limit: int = DEFAULT_ARTIFACT_CHUNK_RETRIEVAL_LIMIT + + def as_payload(self) -> JsonObject: + return { + "kind": "task", + "task_id": str(self.task_id), + "embedding_config_id": str(self.embedding_config_id), + "query_vector": [float(value) for value in self.query_vector], + "limit": self.limit, + } + + +@dataclass(frozen=True, slots=True) +class CompileContextArtifactScopedSemanticArtifactRetrievalInput: + task_artifact_id: UUID + embedding_config_id: UUID + query_vector: tuple[float, ...] + limit: int = DEFAULT_ARTIFACT_CHUNK_RETRIEVAL_LIMIT + + def as_payload(self) -> JsonObject: + return { + "kind": "artifact", + "task_artifact_id": str(self.task_artifact_id), + "embedding_config_id": str(self.embedding_config_id), + "query_vector": [float(value) for value in self.query_vector], + "limit": self.limit, + } + + +CompileContextSemanticArtifactRetrievalInput: TypeAlias = ( + CompileContextTaskScopedSemanticArtifactRetrievalInput + | CompileContextArtifactScopedSemanticArtifactRetrievalInput +) + + @dataclass(frozen=True, slots=True) class TraceCreate: user_id: UUID @@ -394,6 +434,34 @@ class ContextPackArtifactChunkSummary(TypedDict): order: list[str] +class ContextPackSemanticArtifactChunk(TypedDict): + id: str + task_id: str + task_artifact_id: str + relative_path: str + media_type: str + sequence_no: int + char_start: int + char_end_exclusive: int + text: str + score: float + + +class ContextPackSemanticArtifactChunkSummary(TypedDict): + requested: bool + scope: TaskArtifactChunkRetrievalScope | None + embedding_config_id: str | None + query_vector_dimensions: int + limit: int + searched_artifact_count: int + candidate_count: int + included_count: int + excluded_uningested_artifact_count: int + excluded_limit_count: int + similarity_metric: Literal["cosine_similarity"] | None + order: list[str] + + class ArtifactRetrievalDecisionTracePayload(TypedDict): scope_kind: TaskArtifactChunkRetrievalScopeKind task_id: str @@ -410,6 +478,23 @@ class ArtifactRetrievalDecisionTracePayload(TypedDict): char_end_exclusive: NotRequired[int] +class SemanticArtifactRetrievalDecisionTracePayload(TypedDict): + scope_kind: TaskArtifactChunkRetrievalScopeKind + task_id: str + task_artifact_id: str + relative_path: str + media_type: str | None + ingestion_status: TaskArtifactIngestionStatus + embedding_config_id: str + query_vector_dimensions: int + limit: int + similarity_metric: Literal["cosine_similarity"] + score: NotRequired[float] + sequence_no: NotRequired[int] + char_start: NotRequired[int] + char_end_exclusive: NotRequired[int] + + class ContextPackMemorySummary(TypedDict): candidate_count: int included_count: int @@ -495,6 +580,8 @@ class CompiledContextPack(TypedDict): memory_summary: ContextPackMemorySummary artifact_chunks: list[ContextPackArtifactChunk] artifact_chunk_summary: ContextPackArtifactChunkSummary + semantic_artifact_chunks: list[ContextPackSemanticArtifactChunk] + semantic_artifact_chunk_summary: ContextPackSemanticArtifactChunkSummary entities: list[ContextPackEntity] entity_summary: ContextPackEntitySummary entity_edges: list[ContextPackEntityEdge] diff --git a/apps/api/src/alicebot_api/main.py b/apps/api/src/alicebot_api/main.py index fb487e6..e530778 100644 --- a/apps/api/src/alicebot_api/main.py +++ b/apps/api/src/alicebot_api/main.py @@ -16,8 +16,11 @@ ApprovalRejectInput, ApprovalRequestCreateInput, ArtifactScopedSemanticArtifactChunkRetrievalInput, + CompileContextArtifactScopedSemanticArtifactRetrievalInput, CompileContextArtifactScopedArtifactRetrievalInput, + CompileContextSemanticArtifactRetrievalInput, CompileContextTaskScopedArtifactRetrievalInput, + CompileContextTaskScopedSemanticArtifactRetrievalInput, ConsentStatus, ConsentUpsertInput, CompileContextSemanticRetrievalInput, @@ -291,6 +294,41 @@ class CompileContextArtifactScopedArtifactRetrievalRequest(BaseModel): ] +class CompileContextTaskScopedSemanticArtifactRetrievalRequest(BaseModel): + model_config = ConfigDict(extra="forbid") + + kind: Literal["task"] + task_id: UUID + embedding_config_id: UUID + query_vector: list[float] = Field(min_length=1, max_length=20000) + limit: int = Field( + default=DEFAULT_ARTIFACT_CHUNK_RETRIEVAL_LIMIT, + ge=1, + le=MAX_ARTIFACT_CHUNK_RETRIEVAL_LIMIT, + ) + + +class CompileContextArtifactScopedSemanticArtifactRetrievalRequest(BaseModel): + model_config = ConfigDict(extra="forbid") + + kind: Literal["artifact"] + task_artifact_id: UUID + embedding_config_id: UUID + query_vector: list[float] = Field(min_length=1, max_length=20000) + limit: int = Field( + default=DEFAULT_ARTIFACT_CHUNK_RETRIEVAL_LIMIT, + ge=1, + le=MAX_ARTIFACT_CHUNK_RETRIEVAL_LIMIT, + ) + + +CompileContextSemanticArtifactRetrievalRequest = Annotated[ + CompileContextTaskScopedSemanticArtifactRetrievalRequest + | CompileContextArtifactScopedSemanticArtifactRetrievalRequest, + Field(discriminator="kind"), +] + + class CompileContextRequest(BaseModel): user_id: UUID thread_id: UUID @@ -301,6 +339,7 @@ class CompileContextRequest(BaseModel): max_entity_edges: int = Field(default=DEFAULT_MAX_ENTITY_EDGES, ge=0, le=100) semantic: CompileContextSemanticRequest | None = None artifact_retrieval: CompileContextArtifactRetrievalRequest | None = None + semantic_artifact_retrieval: CompileContextSemanticArtifactRetrievalRequest | None = None class GenerateResponseRequest(BaseModel): @@ -616,6 +655,7 @@ def healthcheck() -> JSONResponse: def compile_context(request: CompileContextRequest) -> JSONResponse: settings = get_settings() artifact_retrieval = None + semantic_artifact_retrieval = None if isinstance(request.artifact_retrieval, CompileContextTaskScopedArtifactRetrievalRequest): artifact_retrieval = CompileContextTaskScopedArtifactRetrievalInput( task_id=request.artifact_retrieval.task_id, @@ -631,6 +671,28 @@ def compile_context(request: CompileContextRequest) -> JSONResponse: query=request.artifact_retrieval.query, limit=request.artifact_retrieval.limit, ) + if isinstance( + request.semantic_artifact_retrieval, + CompileContextTaskScopedSemanticArtifactRetrievalRequest, + ): + semantic_artifact_retrieval = CompileContextTaskScopedSemanticArtifactRetrievalInput( + task_id=request.semantic_artifact_retrieval.task_id, + embedding_config_id=request.semantic_artifact_retrieval.embedding_config_id, + query_vector=tuple(request.semantic_artifact_retrieval.query_vector), + limit=request.semantic_artifact_retrieval.limit, + ) + elif isinstance( + request.semantic_artifact_retrieval, + CompileContextArtifactScopedSemanticArtifactRetrievalRequest, + ): + semantic_artifact_retrieval = ( + CompileContextArtifactScopedSemanticArtifactRetrievalInput( + task_artifact_id=request.semantic_artifact_retrieval.task_artifact_id, + embedding_config_id=request.semantic_artifact_retrieval.embedding_config_id, + query_vector=tuple(request.semantic_artifact_retrieval.query_vector), + limit=request.semantic_artifact_retrieval.limit, + ) + ) try: with user_connection(settings.database_url, request.user_id) as conn: @@ -655,9 +717,12 @@ def compile_context(request: CompileContextRequest) -> JSONResponse: ) ), artifact_retrieval=artifact_retrieval, + semantic_artifact_retrieval=semantic_artifact_retrieval, ) except TaskArtifactChunkRetrievalValidationError as exc: return JSONResponse(status_code=400, content={"detail": str(exc)}) + except SemanticArtifactChunkRetrievalValidationError as exc: + return JSONResponse(status_code=400, content={"detail": str(exc)}) except SemanticMemoryRetrievalValidationError as exc: return JSONResponse(status_code=400, content={"detail": str(exc)}) except (TaskNotFoundError, TaskArtifactNotFoundError) as exc: diff --git a/apps/api/src/alicebot_api/response_generation.py b/apps/api/src/alicebot_api/response_generation.py index 7652a5d..78f2ee6 100644 --- a/apps/api/src/alicebot_api/response_generation.py +++ b/apps/api/src/alicebot_api/response_generation.py @@ -90,6 +90,10 @@ def _context_section_payload(context_pack: CompiledContextPack) -> JsonObject: "sessions": context_pack["sessions"], "memories": context_pack["memories"], "memory_summary": context_pack["memory_summary"], + "artifact_chunks": context_pack["artifact_chunks"], + "artifact_chunk_summary": context_pack["artifact_chunk_summary"], + "semantic_artifact_chunks": context_pack["semantic_artifact_chunks"], + "semantic_artifact_chunk_summary": context_pack["semantic_artifact_chunk_summary"], "entities": context_pack["entities"], "entity_summary": context_pack["entity_summary"], "entity_edges": context_pack["entity_edges"], diff --git a/apps/api/src/alicebot_api/semantic_retrieval.py b/apps/api/src/alicebot_api/semantic_retrieval.py index 4fe066d..50059a1 100644 --- a/apps/api/src/alicebot_api/semantic_retrieval.py +++ b/apps/api/src/alicebot_api/semantic_retrieval.py @@ -143,7 +143,7 @@ def _build_task_artifact_chunk_retrieval_scope( return scope -def _serialize_semantic_artifact_chunk_result_item( +def serialize_semantic_artifact_chunk_result_item( row: TaskArtifactChunkSemanticRetrievalRow, ) -> TaskArtifactChunkSemanticRetrievalItem: return { @@ -220,7 +220,7 @@ def retrieve_task_scoped_semantic_artifact_chunk_records( query_vector=request.query_vector, ) items = [ - _serialize_semantic_artifact_chunk_result_item(row) + serialize_semantic_artifact_chunk_result_item(row) for row in store.retrieve_task_scoped_semantic_artifact_chunk_matches( task_id=request.task_id, embedding_config_id=request.embedding_config_id, @@ -264,7 +264,7 @@ def retrieve_artifact_scoped_semantic_artifact_chunk_records( query_vector=request.query_vector, ) items = [ - _serialize_semantic_artifact_chunk_result_item(row) + serialize_semantic_artifact_chunk_result_item(row) for row in store.retrieve_artifact_scoped_semantic_artifact_chunk_matches( task_artifact_id=request.task_artifact_id, embedding_config_id=request.embedding_config_id, diff --git a/tests/integration/test_context_compile.py b/tests/integration/test_context_compile.py index 4b6913b..cc43cd5 100644 --- a/tests/integration/test_context_compile.py +++ b/tests/integration/test_context_compile.py @@ -278,6 +278,23 @@ def seed_memory_embedding_for_user( ) +def seed_task_artifact_chunk_embedding_for_user( + database_url: str, + *, + user_id: UUID, + task_artifact_chunk_id: UUID, + embedding_config_id: UUID, + vector: list[float], +) -> None: + with user_connection(database_url, user_id) as conn: + ContinuityStore(conn).create_task_artifact_chunk_embedding( + task_artifact_chunk_id=task_artifact_chunk_id, + embedding_config_id=embedding_config_id, + dimensions=len(vector), + vector=vector, + ) + + def seed_compile_artifact_scope( database_url: str, *, @@ -468,6 +485,21 @@ def test_compile_context_endpoint_persists_trace_and_trace_events(migrated_datab "semantic_order": ["score_desc", "created_at_asc", "id_asc"], }, } + assert payload["context_pack"]["semantic_artifact_chunks"] == [] + assert payload["context_pack"]["semantic_artifact_chunk_summary"] == { + "requested": False, + "scope": None, + "embedding_config_id": None, + "query_vector_dimensions": 0, + "limit": 0, + "searched_artifact_count": 0, + "candidate_count": 0, + "included_count": 0, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "similarity_metric": None, + "order": ["score_desc", "relative_path_asc", "sequence_no_asc", "id_asc"], + } assert payload["context_pack"]["entities"] == [ { "id": str(included_entity["id"]), @@ -576,6 +608,11 @@ def test_compile_context_endpoint_persists_trace_and_trace_events(migrated_datab assert trace_events[-1]["payload"]["hybrid_memory_candidate_count"] == 2 assert trace_events[-1]["payload"]["hybrid_memory_merged_candidate_count"] == 1 assert trace_events[-1]["payload"]["hybrid_memory_deduplicated_count"] == 0 + assert trace_events[-1]["payload"]["semantic_artifact_retrieval_requested"] is False + assert trace_events[-1]["payload"]["semantic_artifact_chunk_candidate_count"] == 0 + assert trace_events[-1]["payload"]["included_semantic_artifact_chunk_count"] == 0 + assert trace_events[-1]["payload"]["excluded_semantic_artifact_chunk_limit_count"] == 0 + assert trace_events[-1]["payload"]["excluded_semantic_uningested_artifact_count"] == 0 assert trace_events[-1]["payload"]["included_entity_count"] == 1 assert trace_events[-1]["payload"]["excluded_entity_limit_count"] == 2 assert trace_events[-1]["payload"]["included_entity_edge_count"] == 1 @@ -654,6 +691,21 @@ def test_compile_context_prefers_updated_active_memory_within_same_transaction( "semantic_order": ["score_desc", "created_at_asc", "id_asc"], }, } + assert payload["context_pack"]["semantic_artifact_chunks"] == [] + assert payload["context_pack"]["semantic_artifact_chunk_summary"] == { + "requested": False, + "scope": None, + "embedding_config_id": None, + "query_vector_dimensions": 0, + "limit": 0, + "searched_artifact_count": 0, + "candidate_count": 0, + "included_count": 0, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "similarity_metric": None, + "order": ["score_desc", "relative_path_asc", "sequence_no_asc", "id_asc"], + } assert payload["context_pack"]["entity_summary"] == { "candidate_count": 2, "included_count": 1, @@ -1134,6 +1186,353 @@ def test_compile_context_artifact_scoped_retrieval_returns_only_visible_artifact assert trace_events[-1]["payload"]["excluded_uningested_artifact_count"] == 0 +def test_compile_context_semantic_artifact_retrieval_integrates_chunks_traces_and_exclusion_rules( + migrated_database_urls, + monkeypatch, +) -> None: + seeded = seed_traceable_thread(migrated_database_urls["app"]) + artifact_scope = seed_compile_artifact_scope( + migrated_database_urls["app"], + user_id=seeded["user_id"], + thread_id=seeded["thread_id"], + ) + config_id = seed_embedding_config_for_user( + migrated_database_urls["app"], + user_id=seeded["user_id"], + ) + seed_task_artifact_chunk_embedding_for_user( + migrated_database_urls["app"], + user_id=seeded["user_id"], + task_artifact_chunk_id=artifact_scope["chunk_ids"]["docs"], + embedding_config_id=config_id, + vector=[1.0, 0.0, 0.0], + ) + seed_task_artifact_chunk_embedding_for_user( + migrated_database_urls["app"], + user_id=seeded["user_id"], + task_artifact_chunk_id=artifact_scope["chunk_ids"]["notes"], + embedding_config_id=config_id, + vector=[1.0, 0.0, 0.0], + ) + seed_task_artifact_chunk_embedding_for_user( + migrated_database_urls["app"], + user_id=seeded["user_id"], + task_artifact_chunk_id=artifact_scope["chunk_ids"]["weak"], + embedding_config_id=config_id, + vector=[0.0, 1.0, 0.0], + ) + monkeypatch.setattr( + main_module, + "get_settings", + lambda: Settings(database_url=migrated_database_urls["app"]), + ) + + status_code, payload = invoke_compile_context( + { + "user_id": str(seeded["user_id"]), + "thread_id": str(seeded["thread_id"]), + "semantic_artifact_retrieval": { + "kind": "task", + "task_id": str(artifact_scope["task_id"]), + "embedding_config_id": str(config_id), + "query_vector": [1.0, 0.0, 0.0], + "limit": 2, + }, + } + ) + + assert status_code == 200 + assert payload["context_pack"]["semantic_artifact_chunks"] == [ + { + "id": str(artifact_scope["chunk_ids"]["docs"]), + "task_id": str(artifact_scope["task_id"]), + "task_artifact_id": str(artifact_scope["artifact_ids"]["docs"]), + "relative_path": "docs/a.txt", + "media_type": "text/plain", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 14, + "text": "beta alpha doc", + "score": 1.0, + }, + { + "id": str(artifact_scope["chunk_ids"]["notes"]), + "task_id": str(artifact_scope["task_id"]), + "task_artifact_id": str(artifact_scope["artifact_ids"]["notes"]), + "relative_path": "notes/b.md", + "media_type": "text/markdown", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 15, + "text": "alpha beta note", + "score": 1.0, + }, + ] + assert payload["context_pack"]["semantic_artifact_chunk_summary"] == { + "requested": True, + "scope": {"kind": "task", "task_id": str(artifact_scope["task_id"])}, + "embedding_config_id": str(config_id), + "query_vector_dimensions": 3, + "limit": 2, + "searched_artifact_count": 3, + "candidate_count": 3, + "included_count": 2, + "excluded_uningested_artifact_count": 1, + "excluded_limit_count": 1, + "similarity_metric": "cosine_similarity", + "order": ["score_desc", "relative_path_asc", "sequence_no_asc", "id_asc"], + } + assert payload["context_pack"]["artifact_chunks"] == [] + + trace_id = UUID(payload["trace_id"]) + with user_connection(migrated_database_urls["app"], seeded["user_id"]) as conn: + trace_events = ContinuityStore(conn).list_trace_events(trace_id) + + assert any( + event["payload"]["reason"] == "within_semantic_artifact_chunk_limit" + and event["payload"]["entity_id"] == str(artifact_scope["chunk_ids"]["docs"]) + and event["payload"]["relative_path"] == "docs/a.txt" + and event["payload"]["score"] == 1.0 + for event in trace_events + if event["kind"] == "context.included" + ) + assert any( + event["payload"]["reason"] == "within_semantic_artifact_chunk_limit" + and event["payload"]["entity_id"] == str(artifact_scope["chunk_ids"]["notes"]) + and event["payload"]["relative_path"] == "notes/b.md" + for event in trace_events + if event["kind"] == "context.included" + ) + assert any( + event["payload"]["reason"] == "semantic_artifact_chunk_limit_exceeded" + and event["payload"]["entity_id"] == str(artifact_scope["chunk_ids"]["weak"]) + and event["payload"]["relative_path"] == "notes/c.txt" + and event["payload"]["score"] == 0.0 + for event in trace_events + if event["kind"] == "context.excluded" + ) + assert any( + event["payload"]["reason"] == "semantic_artifact_not_ingested" + and event["payload"]["entity_id"] == str(artifact_scope["artifact_ids"]["pending"]) + and event["payload"]["relative_path"] == "notes/hidden.txt" + and event["payload"]["ingestion_status"] == "pending" + for event in trace_events + if event["kind"] == "context.excluded" + ) + assert trace_events[-1]["payload"]["semantic_artifact_retrieval_requested"] is True + assert trace_events[-1]["payload"]["semantic_artifact_retrieval_scope_kind"] == "task" + assert trace_events[-1]["payload"]["semantic_artifact_chunk_candidate_count"] == 3 + assert trace_events[-1]["payload"]["included_semantic_artifact_chunk_count"] == 2 + assert trace_events[-1]["payload"]["excluded_semantic_artifact_chunk_limit_count"] == 1 + assert trace_events[-1]["payload"]["excluded_semantic_uningested_artifact_count"] == 1 + + +def test_compile_context_semantic_artifact_scoped_retrieval_returns_only_visible_artifact_chunks( + migrated_database_urls, + monkeypatch, +) -> None: + seeded = seed_traceable_thread(migrated_database_urls["app"]) + artifact_scope = seed_compile_artifact_scope( + migrated_database_urls["app"], + user_id=seeded["user_id"], + thread_id=seeded["thread_id"], + ) + config_id = seed_embedding_config_for_user( + migrated_database_urls["app"], + user_id=seeded["user_id"], + ) + seed_task_artifact_chunk_embedding_for_user( + migrated_database_urls["app"], + user_id=seeded["user_id"], + task_artifact_chunk_id=artifact_scope["chunk_ids"]["notes"], + embedding_config_id=config_id, + vector=[1.0, 0.0, 0.0], + ) + monkeypatch.setattr( + main_module, + "get_settings", + lambda: Settings(database_url=migrated_database_urls["app"]), + ) + + status_code, payload = invoke_compile_context( + { + "user_id": str(seeded["user_id"]), + "thread_id": str(seeded["thread_id"]), + "semantic_artifact_retrieval": { + "kind": "artifact", + "task_artifact_id": str(artifact_scope["artifact_ids"]["notes"]), + "embedding_config_id": str(config_id), + "query_vector": [1.0, 0.0, 0.0], + "limit": 2, + }, + } + ) + + assert status_code == 200 + assert payload["context_pack"]["semantic_artifact_chunks"] == [ + { + "id": str(artifact_scope["chunk_ids"]["notes"]), + "task_id": str(artifact_scope["task_id"]), + "task_artifact_id": str(artifact_scope["artifact_ids"]["notes"]), + "relative_path": "notes/b.md", + "media_type": "text/markdown", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 15, + "text": "alpha beta note", + "score": 1.0, + } + ] + assert payload["context_pack"]["semantic_artifact_chunk_summary"] == { + "requested": True, + "scope": { + "kind": "artifact", + "task_id": str(artifact_scope["task_id"]), + "task_artifact_id": str(artifact_scope["artifact_ids"]["notes"]), + }, + "embedding_config_id": str(config_id), + "query_vector_dimensions": 3, + "limit": 2, + "searched_artifact_count": 1, + "candidate_count": 1, + "included_count": 1, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "similarity_metric": "cosine_similarity", + "order": ["score_desc", "relative_path_asc", "sequence_no_asc", "id_asc"], + } + + trace_id = UUID(payload["trace_id"]) + with user_connection(migrated_database_urls["app"], seeded["user_id"]) as conn: + trace_events = ContinuityStore(conn).list_trace_events(trace_id) + + assert any( + event["payload"]["reason"] == "within_semantic_artifact_chunk_limit" + and event["payload"]["entity_id"] == str(artifact_scope["chunk_ids"]["notes"]) + and event["payload"]["scope_kind"] == "artifact" + and event["payload"]["task_artifact_id"] == str(artifact_scope["artifact_ids"]["notes"]) + for event in trace_events + if event["kind"] == "context.included" + ) + assert trace_events[-1]["payload"]["semantic_artifact_retrieval_requested"] is True + assert trace_events[-1]["payload"]["semantic_artifact_retrieval_scope_kind"] == "artifact" + assert trace_events[-1]["payload"]["semantic_artifact_chunk_candidate_count"] == 1 + assert trace_events[-1]["payload"]["included_semantic_artifact_chunk_count"] == 1 + assert trace_events[-1]["payload"]["excluded_semantic_artifact_chunk_limit_count"] == 0 + assert trace_events[-1]["payload"]["excluded_semantic_uningested_artifact_count"] == 0 + + +def test_compile_context_semantic_artifact_retrieval_validation_and_isolation( + migrated_database_urls, + monkeypatch, +) -> None: + owner = seed_traceable_thread(migrated_database_urls["app"]) + intruder = seed_traceable_thread( + migrated_database_urls["app"], + email="intruder@example.com", + display_name="Intruder", + ) + owner_artifact_scope = seed_compile_artifact_scope( + migrated_database_urls["app"], + user_id=owner["user_id"], + thread_id=owner["thread_id"], + ) + owner_config_id = seed_embedding_config_for_user( + migrated_database_urls["app"], + user_id=owner["user_id"], + ) + monkeypatch.setattr( + main_module, + "get_settings", + lambda: Settings(database_url=migrated_database_urls["app"]), + ) + + invalid_shape_status, invalid_shape_payload = invoke_compile_context( + { + "user_id": str(owner["user_id"]), + "thread_id": str(owner["thread_id"]), + "semantic_artifact_retrieval": { + "kind": "task", + "task_artifact_id": str(owner_artifact_scope["artifact_ids"]["docs"]), + "embedding_config_id": str(owner_config_id), + "query_vector": [1.0, 0.0, 0.0], + }, + } + ) + missing_status, missing_payload = invoke_compile_context( + { + "user_id": str(owner["user_id"]), + "thread_id": str(owner["thread_id"]), + "semantic_artifact_retrieval": { + "kind": "task", + "task_id": str(owner_artifact_scope["task_id"]), + "embedding_config_id": str(uuid4()), + "query_vector": [1.0, 0.0, 0.0], + "limit": 2, + }, + } + ) + mismatch_status, mismatch_payload = invoke_compile_context( + { + "user_id": str(owner["user_id"]), + "thread_id": str(owner["thread_id"]), + "semantic_artifact_retrieval": { + "kind": "task", + "task_id": str(owner_artifact_scope["task_id"]), + "embedding_config_id": str(owner_config_id), + "query_vector": [1.0, 0.0], + "limit": 2, + }, + } + ) + isolated_task_status, isolated_task_payload = invoke_compile_context( + { + "user_id": str(intruder["user_id"]), + "thread_id": str(intruder["thread_id"]), + "semantic_artifact_retrieval": { + "kind": "task", + "task_id": str(owner_artifact_scope["task_id"]), + "embedding_config_id": str(owner_config_id), + "query_vector": [1.0, 0.0, 0.0], + "limit": 2, + }, + } + ) + isolated_artifact_status, isolated_artifact_payload = invoke_compile_context( + { + "user_id": str(intruder["user_id"]), + "thread_id": str(intruder["thread_id"]), + "semantic_artifact_retrieval": { + "kind": "artifact", + "task_artifact_id": str(owner_artifact_scope["artifact_ids"]["docs"]), + "embedding_config_id": str(owner_config_id), + "query_vector": [1.0, 0.0, 0.0], + "limit": 2, + }, + } + ) + + assert invalid_shape_status == 422 + assert "task_id" in json.dumps(invalid_shape_payload) + assert missing_status == 400 + assert missing_payload["detail"].startswith( + "embedding_config_id must reference an existing embedding config owned by the user" + ) + assert mismatch_status == 400 + assert mismatch_payload["detail"] == "query_vector length must match embedding config dimensions (3): 2" + assert isolated_task_status == 404 + assert isolated_task_payload == { + "detail": f"task {owner_artifact_scope['task_id']} was not found" + } + assert isolated_artifact_status == 404 + assert isolated_artifact_payload == { + "detail": ( + "task artifact " + f"{owner_artifact_scope['artifact_ids']['docs']} was not found" + ) + } + + def test_compile_context_artifact_retrieval_validation_and_isolation( migrated_database_urls, monkeypatch, diff --git a/tests/unit/test_compiler.py b/tests/unit/test_compiler.py index e0c2cae..7ff19c9 100644 --- a/tests/unit/test_compiler.py +++ b/tests/unit/test_compiler.py @@ -7,10 +7,13 @@ SUMMARY_TRACE_EVENT_KIND, _compile_artifact_chunk_section, _compile_memory_section, + _compile_semantic_artifact_chunk_section, compile_continuity_context, ) from alicebot_api.contracts import ( + CompileContextArtifactScopedSemanticArtifactRetrievalInput, CompileContextSemanticRetrievalInput, + CompileContextTaskScopedSemanticArtifactRetrievalInput, CompileContextTaskScopedArtifactRetrievalInput, ContextCompilerLimits, ) @@ -313,6 +316,21 @@ def test_compile_continuity_context_is_deterministic_and_stably_ordered() -> Non "id_asc", ], } + assert first_run.context_pack["semantic_artifact_chunks"] == [] + assert first_run.context_pack["semantic_artifact_chunk_summary"] == { + "requested": False, + "scope": None, + "embedding_config_id": None, + "query_vector_dimensions": 0, + "limit": 0, + "searched_artifact_count": 0, + "candidate_count": 0, + "included_count": 0, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "similarity_metric": None, + "order": ["score_desc", "relative_path_asc", "sequence_no_asc", "id_asc"], + } assert first_run.context_pack["entity_summary"] == { "candidate_count": 3, "included_count": 2, @@ -643,6 +661,21 @@ def test_compile_continuity_context_records_included_and_excluded_reasons() -> N "id_asc", ], } + assert compiler_run.context_pack["semantic_artifact_chunks"] == [] + assert compiler_run.context_pack["semantic_artifact_chunk_summary"] == { + "requested": False, + "scope": None, + "embedding_config_id": None, + "query_vector_dimensions": 0, + "limit": 0, + "searched_artifact_count": 0, + "candidate_count": 0, + "included_count": 0, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "similarity_metric": None, + "order": ["score_desc", "relative_path_asc", "sequence_no_asc", "id_asc"], + } assert compiler_run.context_pack["entities"] == [ { "id": str(kept_entity_id), @@ -681,6 +714,14 @@ def test_compile_continuity_context_records_included_and_excluded_reasons() -> N assert compiler_run.trace_events[-1].payload["included_artifact_chunk_count"] == 0 assert compiler_run.trace_events[-1].payload["excluded_artifact_chunk_limit_count"] == 0 assert compiler_run.trace_events[-1].payload["excluded_uningested_artifact_count"] == 0 + assert compiler_run.trace_events[-1].payload["semantic_artifact_retrieval_requested"] is False + assert compiler_run.trace_events[-1].payload["semantic_artifact_chunk_candidate_count"] == 0 + assert compiler_run.trace_events[-1].payload["included_semantic_artifact_chunk_count"] == 0 + assert ( + compiler_run.trace_events[-1].payload["excluded_semantic_artifact_chunk_limit_count"] + == 0 + ) + assert compiler_run.trace_events[-1].payload["excluded_semantic_uningested_artifact_count"] == 0 class SemanticCompileStoreStub: @@ -745,9 +786,15 @@ def list_memory_embeddings_for_config(self, embedding_config_id): class ArtifactCompileStoreStub: def __init__(self) -> None: self.base_time = datetime(2026, 3, 14, 12, 0, tzinfo=UTC) + self.config_id = uuid4() self.task_id = uuid4() self.artifact_ids = [uuid4(), uuid4(), uuid4(), uuid4()] - self.chunk_ids = [uuid4(), uuid4(), uuid4()] + self.chunk_ids = [uuid4(), uuid4(), uuid4(), uuid4()] + + def get_embedding_config_optional(self, embedding_config_id): + if embedding_config_id != self.config_id: + return None + return {"id": self.config_id, "dimensions": 3} def get_task_optional(self, task_id): if task_id != self.task_id: @@ -845,6 +892,97 @@ def list_task_artifact_chunks(self, task_artifact_id): ] return [] + def get_task_artifact_optional(self, task_artifact_id): + for artifact_row in self.list_task_artifacts_for_task(self.task_id): + if artifact_row["id"] == task_artifact_id: + return artifact_row + return None + + def retrieve_task_scoped_semantic_artifact_chunk_matches( + self, + *, + task_id, + embedding_config_id, + query_vector, + limit, + ): + assert task_id == self.task_id + assert embedding_config_id == self.config_id + assert query_vector == [1.0, 0.0, 0.0] + rows = [ + { + "id": self.chunk_ids[0], + "user_id": uuid4(), + "task_id": self.task_id, + "task_artifact_id": self.artifact_ids[0], + "relative_path": "docs/a.txt", + "media_type_hint": "text/plain", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 14, + "text": "beta alpha doc", + "created_at": self.base_time, + "updated_at": self.base_time, + "embedding_config_id": self.config_id, + "score": 1.0, + }, + { + "id": self.chunk_ids[1], + "user_id": uuid4(), + "task_id": self.task_id, + "task_artifact_id": self.artifact_ids[1], + "relative_path": "notes/b.md", + "media_type_hint": "text/markdown", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 15, + "text": "alpha beta note", + "created_at": self.base_time + timedelta(minutes=1), + "updated_at": self.base_time + timedelta(minutes=1), + "embedding_config_id": self.config_id, + "score": 1.0, + }, + { + "id": self.chunk_ids[3], + "user_id": uuid4(), + "task_id": self.task_id, + "task_artifact_id": self.artifact_ids[3], + "relative_path": "notes/c.txt", + "media_type_hint": "text/plain", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 9, + "text": "beta only", + "created_at": self.base_time + timedelta(minutes=3), + "updated_at": self.base_time + timedelta(minutes=3), + "embedding_config_id": self.config_id, + "score": 0.25, + }, + ] + return list(rows[:limit]) + + def retrieve_artifact_scoped_semantic_artifact_chunk_matches( + self, + *, + task_artifact_id, + embedding_config_id, + query_vector, + limit, + ): + assert embedding_config_id == self.config_id + assert query_vector == [1.0, 0.0, 0.0] + rows = [ + row + for row in self.retrieve_task_scoped_semantic_artifact_chunk_matches( + task_id=self.task_id, + embedding_config_id=embedding_config_id, + query_vector=query_vector, + limit=10, + ) + if row["task_artifact_id"] == task_artifact_id + ] + return list(rows[:limit]) + def test_compile_artifact_chunk_section_orders_limits_and_excludes_non_ingested() -> None: store = ArtifactCompileStoreStub() @@ -922,6 +1060,117 @@ def test_compile_artifact_chunk_section_orders_limits_and_excludes_non_ingested( assert artifact_section.decisions[-1].metadata["relative_path"] == "notes/c.txt" +def test_compile_semantic_artifact_chunk_section_orders_limits_and_excludes_non_ingested() -> None: + store = ArtifactCompileStoreStub() + + semantic_artifact_section = _compile_semantic_artifact_chunk_section( + store, # type: ignore[arg-type] + semantic_artifact_retrieval=CompileContextTaskScopedSemanticArtifactRetrievalInput( + task_id=store.task_id, + embedding_config_id=store.config_id, + query_vector=(1.0, 0.0, 0.0), + limit=2, + ), + ) + + assert semantic_artifact_section.items == [ + { + "id": str(store.chunk_ids[0]), + "task_id": str(store.task_id), + "task_artifact_id": str(store.artifact_ids[0]), + "relative_path": "docs/a.txt", + "media_type": "text/plain", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 14, + "text": "beta alpha doc", + "score": 1.0, + }, + { + "id": str(store.chunk_ids[1]), + "task_id": str(store.task_id), + "task_artifact_id": str(store.artifact_ids[1]), + "relative_path": "notes/b.md", + "media_type": "text/markdown", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 15, + "text": "alpha beta note", + "score": 1.0, + }, + ] + assert semantic_artifact_section.summary == { + "requested": True, + "scope": {"kind": "task", "task_id": str(store.task_id)}, + "embedding_config_id": str(store.config_id), + "query_vector_dimensions": 3, + "limit": 2, + "searched_artifact_count": 3, + "candidate_count": 3, + "included_count": 2, + "excluded_uningested_artifact_count": 1, + "excluded_limit_count": 1, + "similarity_metric": "cosine_similarity", + "order": ["score_desc", "relative_path_asc", "sequence_no_asc", "id_asc"], + } + assert [decision.reason for decision in semantic_artifact_section.decisions] == [ + "semantic_artifact_not_ingested", + "within_semantic_artifact_chunk_limit", + "within_semantic_artifact_chunk_limit", + "semantic_artifact_chunk_limit_exceeded", + ] + assert semantic_artifact_section.decisions[0].metadata["relative_path"] == "notes/hidden.txt" + assert semantic_artifact_section.decisions[-1].metadata["relative_path"] == "notes/c.txt" + + +def test_compile_semantic_artifact_chunk_section_supports_artifact_scope() -> None: + store = ArtifactCompileStoreStub() + + semantic_artifact_section = _compile_semantic_artifact_chunk_section( + store, # type: ignore[arg-type] + semantic_artifact_retrieval=CompileContextArtifactScopedSemanticArtifactRetrievalInput( + task_artifact_id=store.artifact_ids[1], + embedding_config_id=store.config_id, + query_vector=(1.0, 0.0, 0.0), + limit=2, + ), + ) + + assert semantic_artifact_section.items == [ + { + "id": str(store.chunk_ids[1]), + "task_id": str(store.task_id), + "task_artifact_id": str(store.artifact_ids[1]), + "relative_path": "notes/b.md", + "media_type": "text/markdown", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 15, + "text": "alpha beta note", + "score": 1.0, + } + ] + assert semantic_artifact_section.summary == { + "requested": True, + "scope": { + "kind": "artifact", + "task_id": str(store.task_id), + "task_artifact_id": str(store.artifact_ids[1]), + }, + "embedding_config_id": str(store.config_id), + "query_vector_dimensions": 3, + "limit": 2, + "searched_artifact_count": 1, + "candidate_count": 1, + "included_count": 1, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "similarity_metric": "cosine_similarity", + "order": ["score_desc", "relative_path_asc", "sequence_no_asc", "id_asc"], + } + assert semantic_artifact_section.decisions[0].metadata["scope_kind"] == "artifact" + + def test_compile_memory_section_orders_limits_and_excludes_deleted() -> None: store = SemanticCompileStoreStub() deleted_memory = { diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py index 0b3441d..c8e63b8 100644 --- a/tests/unit/test_main.py +++ b/tests/unit/test_main.py @@ -21,7 +21,10 @@ from alicebot_api.entity_edge import EntityEdgeValidationError from alicebot_api.memory import MemoryAdmissionValidationError, MemoryReviewNotFoundError from alicebot_api.response_generation import ResponseFailure -from alicebot_api.semantic_retrieval import SemanticMemoryRetrievalValidationError +from alicebot_api.semantic_retrieval import ( + SemanticArtifactChunkRetrievalValidationError, + SemanticMemoryRetrievalValidationError, +) from alicebot_api.store import ContinuityStoreInvariantError @@ -198,6 +201,7 @@ def fake_compile_and_persist_trace( limits, semantic_retrieval, artifact_retrieval, + semantic_artifact_retrieval, ): captured["store_type"] = type(store).__name__ captured["user_id"] = user_id @@ -205,6 +209,7 @@ def fake_compile_and_persist_trace( captured["limits"] = limits captured["semantic_retrieval"] = semantic_retrieval captured["artifact_retrieval"] = artifact_retrieval + captured["semantic_artifact_retrieval"] = semantic_artifact_retrieval return CompiledTraceRun( trace_id="trace-123", trace_event_count=5, @@ -288,6 +293,21 @@ def fake_compile_and_persist_trace( "id_asc", ], }, + "semantic_artifact_chunks": [], + "semantic_artifact_chunk_summary": { + "requested": False, + "scope": None, + "embedding_config_id": None, + "query_vector_dimensions": 0, + "limit": 0, + "searched_artifact_count": 0, + "candidate_count": 0, + "included_count": 0, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "similarity_metric": None, + "order": ["score_desc", "relative_path_asc", "sequence_no_asc", "id_asc"], + }, "entities": [ { "id": "entity-123", @@ -423,6 +443,21 @@ def fake_compile_and_persist_trace( "id_asc", ], }, + "semantic_artifact_chunks": [], + "semantic_artifact_chunk_summary": { + "requested": False, + "scope": None, + "embedding_config_id": None, + "query_vector_dimensions": 0, + "limit": 0, + "searched_artifact_count": 0, + "candidate_count": 0, + "included_count": 0, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "similarity_metric": None, + "order": ["score_desc", "relative_path_asc", "sequence_no_asc", "id_asc"], + }, "entities": [ { "id": "entity-123", @@ -468,6 +503,7 @@ def fake_compile_and_persist_trace( assert captured["limits"].max_entity_edges == 6 assert captured["semantic_retrieval"] is None assert captured["artifact_retrieval"] is None + assert captured["semantic_artifact_retrieval"] is None def test_compile_context_returns_not_found_when_scope_row_is_missing(monkeypatch) -> None: @@ -518,6 +554,7 @@ def fake_compile_and_persist_trace( limits, semantic_retrieval, artifact_retrieval, + semantic_artifact_retrieval, ): captured["store_type"] = type(store).__name__ captured["user_id"] = user_id @@ -525,6 +562,7 @@ def fake_compile_and_persist_trace( captured["limits"] = limits captured["semantic_retrieval"] = semantic_retrieval captured["artifact_retrieval"] = artifact_retrieval + captured["semantic_artifact_retrieval"] = semantic_artifact_retrieval return CompiledTraceRun( trace_id="trace-semantic", trace_event_count=7, @@ -628,6 +666,34 @@ def fake_compile_and_persist_trace( "id_asc", ], }, + "semantic_artifact_chunks": [ + { + "id": "semantic-chunk-123", + "task_id": "task-123", + "task_artifact_id": "artifact-123", + "relative_path": "docs/spec.txt", + "media_type": "text/plain", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 16, + "text": "alpha beta spec", + "score": 0.99, + } + ], + "semantic_artifact_chunk_summary": { + "requested": True, + "scope": {"kind": "task", "task_id": "task-123"}, + "embedding_config_id": str(config_id), + "query_vector_dimensions": 3, + "limit": 2, + "searched_artifact_count": 1, + "candidate_count": 1, + "included_count": 1, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "similarity_metric": "cosine_similarity", + "order": ["score_desc", "relative_path_asc", "sequence_no_asc", "id_asc"], + }, "entities": [], "entity_summary": { "candidate_count": 0, @@ -663,6 +729,15 @@ def fake_compile_and_persist_trace( query="alpha beta", limit=2, ), + semantic_artifact_retrieval=( + main_module.CompileContextTaskScopedSemanticArtifactRetrievalRequest( + kind="task", + task_id=uuid4(), + embedding_config_id=config_id, + query_vector=[0.1, 0.2, 0.3], + limit=2, + ) + ), ) ) @@ -692,6 +767,10 @@ def fake_compile_and_persist_trace( assert captured["artifact_retrieval"].task_id is not None assert captured["artifact_retrieval"].query == "alpha beta" assert captured["artifact_retrieval"].limit == 2 + assert captured["semantic_artifact_retrieval"].task_id is not None + assert captured["semantic_artifact_retrieval"].embedding_config_id == config_id + assert captured["semantic_artifact_retrieval"].query_vector == (0.1, 0.2, 0.3) + assert captured["semantic_artifact_retrieval"].limit == 2 monkeypatch.setattr( main_module, @@ -720,6 +799,37 @@ def fake_compile_and_persist_trace( "detail": "embedding_config_id must reference an existing embedding config owned by the user" } + monkeypatch.setattr( + main_module, + "compile_and_persist_trace", + lambda *_args, **_kwargs: (_ for _ in ()).throw( + SemanticArtifactChunkRetrievalValidationError( + "query_vector length must match embedding config dimensions (3): 2" + ) + ), + ) + + semantic_artifact_error_response = main_module.compile_context( + main_module.CompileContextRequest( + user_id=user_id, + thread_id=thread_id, + semantic_artifact_retrieval=( + main_module.CompileContextTaskScopedSemanticArtifactRetrievalRequest( + kind="task", + task_id=uuid4(), + embedding_config_id=config_id, + query_vector=[0.1, 0.2], + limit=2, + ) + ), + ) + ) + + assert semantic_artifact_error_response.status_code == 400 + assert json.loads(semantic_artifact_error_response.body) == { + "detail": "query_vector length must match embedding config dimensions (3): 2" + } + def test_compile_context_request_rejects_invalid_artifact_scope_shape() -> None: with pytest.raises(Exception) as exc_info: @@ -736,6 +846,22 @@ def test_compile_context_request_rejects_invalid_artifact_scope_shape() -> None: assert "task_id" in str(exc_info.value) +def test_compile_context_request_rejects_invalid_semantic_artifact_scope_shape() -> None: + with pytest.raises(Exception) as exc_info: + main_module.CompileContextRequest( + user_id=uuid4(), + thread_id=uuid4(), + semantic_artifact_retrieval={ + "kind": "task", + "task_artifact_id": str(uuid4()), + "embedding_config_id": str(uuid4()), + "query_vector": [0.1, 0.2, 0.3], + }, + ) + + assert "task_id" in str(exc_info.value) + + def test_generate_assistant_response_returns_assistant_and_trace_payload(monkeypatch) -> None: user_id = uuid4() thread_id = uuid4() diff --git a/tests/unit/test_response_generation.py b/tests/unit/test_response_generation.py index f91c051..59cbd40 100644 --- a/tests/unit/test_response_generation.py +++ b/tests/unit/test_response_generation.py @@ -88,6 +88,42 @@ def make_context_pack() -> dict[str, object]: "semantic_order": ["score_desc", "created_at_asc", "id_asc"], }, }, + "artifact_chunks": [], + "artifact_chunk_summary": { + "requested": False, + "scope": None, + "query": None, + "query_terms": [], + "matching_rule": "casefolded_unicode_word_overlap_unique_query_terms_v1", + "limit": 0, + "searched_artifact_count": 0, + "candidate_count": 0, + "included_count": 0, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "order": [ + "matched_query_term_count_desc", + "first_match_char_start_asc", + "relative_path_asc", + "sequence_no_asc", + "id_asc", + ], + }, + "semantic_artifact_chunks": [], + "semantic_artifact_chunk_summary": { + "requested": False, + "scope": None, + "embedding_config_id": None, + "query_vector_dimensions": 0, + "limit": 0, + "searched_artifact_count": 0, + "candidate_count": 0, + "included_count": 0, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "similarity_metric": None, + "order": ["score_desc", "relative_path_asc", "sequence_no_asc", "id_asc"], + }, "entities": [], "entity_summary": { "candidate_count": 0,