From 13e4b8e55276ae56cea9b0ebff68ee00f60b0620 Mon Sep 17 00:00:00 2001 From: Sami Rusani Date: Sat, 14 Mar 2026 22:28:51 +0100 Subject: [PATCH] Sprint 5F: artifact chunk compile integration --- .ai/active/SPRINT_PACKET.md | 115 +++--- ARCHITECTURE.md | 32 +- BUILD_REPORT.md | 404 ++++++++++++--------- REVIEW_REPORT.md | 48 +-- apps/api/src/alicebot_api/compiler.py | 228 ++++++++++++ apps/api/src/alicebot_api/contracts.py | 86 ++++- apps/api/src/alicebot_api/main.py | 63 +++- tests/integration/test_context_compile.py | 424 ++++++++++++++++++++++ tests/unit/test_compiler.py | 234 +++++++++++- tests/unit/test_main.py | 132 ++++++- 10 files changed, 1499 insertions(+), 267 deletions(-) diff --git a/.ai/active/SPRINT_PACKET.md b/.ai/active/SPRINT_PACKET.md index 2fa5e70..e8e8691 100644 --- a/.ai/active/SPRINT_PACKET.md +++ b/.ai/active/SPRINT_PACKET.md @@ -2,7 +2,7 @@ ## Sprint Title -Sprint 5E: Artifact Chunk Retrieval V0 +Sprint 5F: Artifact Chunk Compile Integration V0 ## Sprint Type @@ -10,55 +10,59 @@ feature ## Sprint Reason -Milestone 5 now has deterministic workspace boundaries, explicit artifact records, and durable chunk ingestion. The next safe step is to retrieve those ingested chunks through a narrow deterministic read path before adding embeddings, ranking, rich-document parsing, connectors, or UI. +Milestone 5 now has deterministic workspace boundaries, explicit artifact records, local text-artifact ingestion, and lexical chunk retrieval. The next safe step is to make those retrieved chunks available to the existing context compiler so document-aware responses can build on durable artifact data instead of isolated read APIs. ## Sprint Intent -Add a narrow retrieval seam over existing `task_artifact_chunks` so clients can request relevant ingested text chunks for one task or artifact using deterministic lexical matching only, without yet adding embeddings, compile-path integration, connectors, or UI. +Extend the existing context-compile path so it can optionally retrieve and include relevant artifact chunks using the shipped lexical artifact-chunk retrieval seam, without yet adding embeddings, semantic retrieval, Gmail/Calendar connectors, or UI. ## Git Instructions -- Branch Name: `codex/sprint-5e-artifact-chunk-retrieval-v0` +- Branch Name: `codex/sprint-5f-artifact-chunk-compile-integration-v0` - Base Branch: `main` - PR Strategy: one sprint branch, one PR, no stacked PRs unless Control Tower explicitly opens a follow-up sprint - Merge Policy: squash merge only after reviewer `PASS` and explicit Control Tower merge approval ## Why This Sprint -- Sprint 5A established rooted task-workspace provisioning. -- Sprint 5C established explicit task-artifact registration. -- Sprint 5D established deterministic local text-artifact ingestion into durable chunk rows. -- The next narrow Milestone 5 seam is retrieval over those persisted chunks only, so later document-aware context work can build on a stable read contract instead of raw file access. +- Sprint 5A shipped deterministic rooted task-workspace provisioning. +- Sprint 5C shipped explicit task-artifact registration. +- Sprint 5D shipped deterministic local artifact ingestion into durable chunk rows. +- Sprint 5E shipped deterministic lexical retrieval over those chunk rows. +- The next narrow Milestone 5 seam is compile-path integration of those persisted chunk results only, so document-aware context can land without jumping into semantic retrieval, connector work, or richer parsing. ## In Scope - Define typed contracts for: - - artifact-chunk retrieval requests - - artifact-chunk retrieval result items - - retrieval summary metadata -- Implement a narrow retrieval seam that: - - searches only durable `task_artifact_chunks` - - scopes retrieval by the current user plus one explicit task or one explicit artifact - - accepts one explicit text query - - uses deterministic lexical matching only - - returns deterministic ordered chunk results with explicit match metadata - - excludes artifacts that are not yet ingested -- Implement the minimal API or service paths needed for: - - retrieving chunks for one task - - retrieving chunks for one artifact when the caller wants a narrower scope + - optional artifact-retrieval input on compile requests + - artifact-chunk result items inside the compiled context pack + - artifact-retrieval summary metadata inside compile responses + - artifact-retrieval trace payloads +- Extend the compile path so it can: + - accept an explicit artifact retrieval request scoped to one visible task or one visible artifact + - reuse the existing lexical artifact-chunk retrieval seam + - include retrieved artifact chunks in a separate context-pack section + - record artifact chunk include/exclude decisions in `trace_events` + - preserve deterministic output for the same stored data and inputs +- Ensure compile behavior: + - leaves current continuity, memory, entity, and other context sections intact + - does not merge artifact chunks with memory/entity sections + - excludes non-ingested artifacts + - scopes strictly by user ownership + - uses deterministic ordering and explicit per-section limits - Add unit and integration tests for: - - deterministic retrieval ordering - - scoped retrieval by task and by artifact - - empty-result behavior + - compile request validation for artifact retrieval input + - deterministic artifact-chunk section ordering - exclusion of non-ingested artifacts - - per-user isolation - - stable response shape + - trace logging for included and excluded artifact chunks + - per-user isolation through the compile path + - response-shape stability for the new artifact-chunk section ## Out of Scope - No embeddings for artifact chunks. -- No semantic retrieval or reranking. -- No compile-path integration of artifact chunks yet. +- No semantic retrieval or reranking for artifact chunks. +- No compile-path merge between artifact chunks and memory/entity sections. - No PDF, DOCX, OCR, or rich document parsing beyond the already-shipped text ingestion seam. - No Gmail or Calendar connector scope. - No runner-style orchestration. @@ -66,58 +70,59 @@ Add a narrow retrieval seam over existing `task_artifact_chunks` so clients can ## Required Deliverables -- Stable chunk-retrieval request and response contracts. -- Minimal deterministic lexical retrieval path over existing `task_artifact_chunks`. -- Unit and integration coverage for ordering, scoping, exclusion rules, and isolation. +- Stable compile-request and compile-response contract updates for artifact chunk retrieval input and output. +- Compile-path integration with the existing lexical artifact-chunk retrieval seam. +- Trace coverage for artifact retrieval decisions inside compile runs. +- Unit and integration coverage for compile-path artifact behavior, ordering, exclusion rules, and isolation. - Updated `BUILD_REPORT.md` with exact verification results and explicit deferred scope. ## Acceptance Criteria -- A client can retrieve relevant ingested chunk records for one visible task using one explicit text query. -- A client can retrieve relevant ingested chunk records for one visible artifact using one explicit text query. -- Retrieval uses only durable `task_artifact_chunks` rows already persisted in the repo. -- Retrieval excludes artifacts whose ingestion is not complete. -- Result ordering is deterministic and documented. +- `POST /v0/context/compile` can optionally accept artifact retrieval input and return a separate artifact-chunk section in the context pack. +- Compile-path artifact retrieval uses only durable `task_artifact_chunks` rows already persisted in the repo. +- Non-ingested artifacts are excluded from compile-path artifact results. +- Artifact include/exclude decisions are persisted in `trace_events`. +- Result ordering is deterministic within the artifact-chunk section. - `./.venv/bin/python -m pytest tests/unit` passes. - `./.venv/bin/python -m pytest tests/integration` passes. -- No embeddings, semantic retrieval, compile integration, connector, runner, UI, or broader side-effect scope enters the sprint. +- No embeddings, semantic retrieval, connector, runner, UI, or broader side-effect scope enters the sprint. ## Implementation Constraints -- Keep retrieval narrow and boring. -- Reuse existing task-artifact and chunk seams; do not read raw files during retrieval. -- Use deterministic lexical matching only in this sprint. -- Keep scope explicit: one task or one artifact per request. -- Do not merge artifact-chunk retrieval into the main context compiler in the same sprint. +- Keep compile integration narrow and boring. +- Reuse the existing artifact retrieval seam; do not read raw files during compile. +- Keep artifact chunks in a separate response section from memory/entity context. +- Do not introduce semantic retrieval, embeddings, or ranking in this sprint. +- Keep scope explicit: one task or one artifact retrieval scope per compile request. ## Suggested Work Breakdown -1. Define chunk-retrieval request and response contracts. -2. Implement deterministic lexical matching over existing chunk rows. -3. Add explicit task-scoped and artifact-scoped retrieval paths. -4. Enforce exclusion of non-ingested artifacts and current-user isolation. +1. Define compile contract updates for optional artifact retrieval input and output. +2. Integrate the existing lexical artifact-chunk retrieval seam into the compile path. +3. Add artifact result summaries and trace-event payloads. +4. Preserve current context sections while adding a separate artifact-chunk section. 5. Add unit and integration tests. 6. Update `BUILD_REPORT.md` with executed verification. ## Build Report Requirements `BUILD_REPORT.md` must include: -- the exact retrieval contracts introduced -- the lexical matching rule and ordering rule used +- the exact compile contract changes introduced +- the artifact retrieval matching and ordering rule used - exact commands run - unit and integration test results -- one example task-scoped retrieval response -- one example artifact-scoped retrieval response +- one example compile request and response showing the artifact-chunk section +- one example of artifact-retrieval trace events inside one compile run - what remains intentionally deferred to later milestones ## Review Focus `REVIEW_REPORT.md` should verify: -- the sprint stayed limited to artifact chunk retrieval over durable chunk rows -- retrieval is deterministic, lexical-only, and scope-limited to one task or one artifact -- ordering, exclusion rules, and isolation are test-backed -- no hidden embeddings, semantic retrieval, compile integration, connector, runner, UI, or broader side-effect scope entered the sprint +- the sprint stayed limited to compile-path artifact chunk integration +- artifact retrieval reuses durable chunk rows and the existing lexical retrieval seam +- ordering, exclusion rules, trace visibility, and isolation are test-backed +- no hidden embeddings, semantic retrieval, connector, runner, UI, or broader side-effect scope entered the sprint ## Exit Condition -This sprint is complete when the repo can retrieve relevant ingested artifact chunks through a deterministic lexical read path scoped to one task or one artifact, verify the full path with Postgres-backed tests, and still defer semantic retrieval, compile integration, and connector work. +This sprint is complete when the repo can optionally include retrieved artifact chunks inside `POST /v0/context/compile`, trace those inclusion decisions, and verify the full path with Postgres-backed tests, while still deferring semantic retrieval, embeddings, connector work, and UI. diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 9899d45..b362714 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -2,16 +2,16 @@ ## Current Implemented Slice -AliceBot now implements the accepted repo slice through Sprint 5D. The shipped backend includes: +AliceBot now implements the accepted repo slice through Sprint 5F. The shipped backend includes: - foundation continuity storage over `users`, `threads`, `sessions`, and append-only `events` - deterministic tracing and context compilation over durable continuity, memory, entity, and entity-edge records - governed memory admission, explicit-preference extraction, memory review labels, review queue reads, evaluation summary reads, explicit embedding config and memory-embedding storage, direct semantic retrieval, and deterministic hybrid compile-path memory merge - deterministic prompt assembly and one no-tools response path that persists assistant replies as immutable continuity events - user-scoped consents, policies, policy evaluation, tool registry, allowlist evaluation, tool routing, approval request persistence, approval resolution, approved-only proxy execution through the in-process `proxy.echo` handler, durable execution review, and execution-budget lifecycle plus enforcement -- durable `tasks`, `task_steps`, `task_workspaces`, `task_artifacts`, and `task_artifact_chunks`, deterministic task-step sequencing, explicit task-step transitions, explicit manual continuation with lineage through `parent_step_id`, `source_approval_id`, and `source_execution_id`, explicit `tool_executions.task_step_id` linkage for execution synchronization, deterministic rooted local task-workspace provisioning, explicit rooted local artifact registration, deterministic local text-artifact ingestion into durable chunk rows, and deterministic artifact plus chunk reads +- durable `tasks`, `task_steps`, `task_workspaces`, `task_artifacts`, and `task_artifact_chunks`, deterministic task-step sequencing, explicit task-step transitions, explicit manual continuation with lineage through `parent_step_id`, `source_approval_id`, and `source_execution_id`, explicit `tool_executions.task_step_id` linkage for execution synchronization, deterministic rooted local task-workspace provisioning, explicit rooted local artifact registration, deterministic local text-artifact ingestion into durable chunk rows, deterministic lexical artifact-chunk retrieval over durable chunk rows, and optional compile-path artifact chunk inclusion as a separate context section -The current multi-step boundary is narrow and explicit. Manual continuation is implemented and review-passed. Approval resolution and proxy execution now both use explicit task-step linkage rather than first-step inference. Task workspaces are now implemented only as deterministic rooted local boundaries, and task artifacts are now implemented only as explicit rooted local-file registrations plus narrow deterministic text ingestion under those workspaces. Broader runner-style orchestration, automatic multi-step progression, retrieval over artifact chunks, embeddings, rich-document parsing, connectors, and new side-effect surfaces are still planned later and must not be described as live behavior. +The current multi-step boundary is narrow and explicit. Manual continuation is implemented and review-passed. Approval resolution and proxy execution now both use explicit task-step linkage rather than first-step inference. Task workspaces are now implemented only as deterministic rooted local boundaries, and task artifacts are now implemented only as explicit rooted local-file registrations, narrow deterministic text ingestion under those workspaces, lexical retrieval over persisted chunk rows, and optional compile-path inclusion of retrieved artifact chunks in a separate response section. Broader runner-style orchestration, automatic multi-step progression, artifact chunk embeddings and semantic retrieval, rich-document parsing, connectors, and new side-effect surfaces are still planned later and must not be described as live behavior. ## Implemented Now @@ -24,7 +24,7 @@ The current multi-step boundary is narrow and explicit. Manual continuation is i - memory and retrieval: `POST /v0/memories/admit`, `POST /v0/memories/extract-explicit-preferences`, `GET /v0/memories`, `GET /v0/memories/review-queue`, `GET /v0/memories/evaluation-summary`, `POST /v0/memories/semantic-retrieval`, `GET /v0/memories/{memory_id}`, `GET /v0/memories/{memory_id}/revisions`, `POST /v0/memories/{memory_id}/labels`, `GET /v0/memories/{memory_id}/labels` - embeddings and graph seams: `POST /v0/embedding-configs`, `GET /v0/embedding-configs`, `POST /v0/memory-embeddings`, `GET /v0/memories/{memory_id}/embeddings`, `GET /v0/memory-embeddings/{memory_embedding_id}`, `POST /v0/entities`, `GET /v0/entities`, `GET /v0/entities/{entity_id}`, `POST /v0/entity-edges`, `GET /v0/entities/{entity_id}/edges` - governance: `POST /v0/consents`, `GET /v0/consents`, `POST /v0/policies`, `GET /v0/policies`, `GET /v0/policies/{policy_id}`, `POST /v0/policies/evaluate`, `POST /v0/tools`, `GET /v0/tools`, `GET /v0/tools/{tool_id}`, `POST /v0/tools/allowlist/evaluate`, `POST /v0/tools/route`, `POST /v0/approvals/requests`, `GET /v0/approvals`, `GET /v0/approvals/{approval_id}`, `POST /v0/approvals/{approval_id}/approve`, `POST /v0/approvals/{approval_id}/reject`, `POST /v0/approvals/{approval_id}/execute` -- task and execution review: `GET /v0/tasks`, `GET /v0/tasks/{task_id}`, `POST /v0/tasks/{task_id}/workspace`, `GET /v0/task-workspaces`, `GET /v0/task-workspaces/{task_workspace_id}`, `POST /v0/task-workspaces/{task_workspace_id}/artifacts`, `GET /v0/task-artifacts`, `GET /v0/task-artifacts/{task_artifact_id}`, `POST /v0/task-artifacts/{task_artifact_id}/ingest`, `GET /v0/task-artifacts/{task_artifact_id}/chunks`, `GET /v0/tasks/{task_id}/steps`, `GET /v0/task-steps/{task_step_id}`, `POST /v0/tasks/{task_id}/steps`, `POST /v0/task-steps/{task_step_id}/transition`, `POST /v0/execution-budgets`, `GET /v0/execution-budgets`, `GET /v0/execution-budgets/{execution_budget_id}`, `POST /v0/execution-budgets/{execution_budget_id}/deactivate`, `POST /v0/execution-budgets/{execution_budget_id}/supersede`, `GET /v0/tool-executions`, `GET /v0/tool-executions/{execution_id}` +- task and execution review: `GET /v0/tasks`, `GET /v0/tasks/{task_id}`, `POST /v0/tasks/{task_id}/workspace`, `GET /v0/task-workspaces`, `GET /v0/task-workspaces/{task_workspace_id}`, `POST /v0/task-workspaces/{task_workspace_id}/artifacts`, `GET /v0/task-artifacts`, `GET /v0/task-artifacts/{task_artifact_id}`, `POST /v0/task-artifacts/{task_artifact_id}/ingest`, `GET /v0/task-artifacts/{task_artifact_id}/chunks`, `POST /v0/tasks/{task_id}/artifact-chunks/retrieve`, `POST /v0/task-artifacts/{task_artifact_id}/chunks/retrieve`, `GET /v0/tasks/{task_id}/steps`, `GET /v0/task-steps/{task_step_id}`, `POST /v0/tasks/{task_id}/steps`, `POST /v0/task-steps/{task_step_id}/transition`, `POST /v0/execution-budgets`, `GET /v0/execution-budgets`, `GET /v0/execution-budgets/{execution_budget_id}`, `POST /v0/execution-budgets/{execution_budget_id}/deactivate`, `POST /v0/execution-budgets/{execution_budget_id}/supersede`, `GET /v0/tool-executions`, `GET /v0/tool-executions/{execution_id}` - `apps/web` and `workers` remain starter shells only. ### Data Foundation @@ -57,11 +57,11 @@ The current multi-step boundary is narrow and explicit. Manual continuation is i ### Repo Boundaries In This Slice -- `apps/api`: implemented API, store, contracts, service logic, and migrations for continuity, tracing, memory, embeddings, entities, policies, tools, approvals, proxy execution, execution budgets, tasks, task steps, task workspaces, task artifacts, and narrow local artifact chunk ingestion. +- `apps/api`: implemented API, store, contracts, service logic, and migrations for continuity, tracing, memory, embeddings, entities, policies, tools, approvals, proxy execution, execution budgets, tasks, task steps, task workspaces, task artifacts, deterministic lexical artifact chunk retrieval, and narrow compile-path artifact chunk inclusion. - `apps/web`: minimal shell only; no shipped workflow UI. - `workers`: scaffold only; no background jobs or runner logic are implemented. - `infra`: local development bootstrap assets only. -- `tests`: unit and Postgres-backed integration coverage for the shipped seams above, including Sprint 4O task-step lineage/manual continuation, Sprint 4S step-linked execution synchronization, Sprint 5A task-workspace provisioning, Sprint 5C task-artifact registration, and Sprint 5D local artifact ingestion plus chunk reads. +- `tests`: unit and Postgres-backed integration coverage for the shipped seams above, including Sprint 4O task-step lineage/manual continuation, Sprint 4S step-linked execution synchronization, Sprint 5A task-workspace provisioning, Sprint 5C task-artifact registration, Sprint 5D local artifact ingestion plus chunk reads, Sprint 5E lexical artifact-chunk retrieval, and Sprint 5F compile-path artifact chunk integration. ## Core Flows Implemented Now @@ -70,8 +70,10 @@ The current multi-step boundary is narrow and explicit. Manual continuation is i 1. Accept a user-scoped `POST /v0/context/compile` request. 2. Read durable continuity records in deterministic order. 3. Merge in active memories, entities, and entity edges through the currently shipped symbolic and optional semantic retrieval paths. -4. Persist a `context.compile` trace plus explicit inclusion and exclusion events. -5. Return one deterministic `context_pack` describing scope, limits, selected context, and trace metadata. +4. Optionally retrieve artifact chunks through the existing lexical artifact-chunk retrieval seam, scoped to exactly one visible task or one visible artifact per request. +5. Keep retrieved artifact chunks separate from memory and entity sections, with deterministic per-section limits and ordering. +6. Persist a `context.compile` trace plus explicit inclusion and exclusion events, including artifact chunk include/exclude decisions. +7. Return one deterministic `context_pack` describing scope, limits, selected context, artifact chunk results, and trace metadata. ### Governed Memory And Retrieval @@ -190,6 +192,16 @@ The current multi-step boundary is narrow and explicit. Manual continuation is i 10. If the artifact is already ingested, return the existing artifact and chunk summary without reinserting chunks. 11. `GET /v0/task-artifacts/{task_artifact_id}/chunks` returns visible chunk rows in deterministic `sequence_no ASC, id ASC` order plus stable summary metadata. +### Artifact Chunk Retrieval + +1. Accept a user-scoped retrieval request scoped to exactly one visible task or one visible artifact. +2. Normalize the query deterministically by casefolding and extracting unique lexical `\w+` terms in first-occurrence order. +3. Read only persisted `task_artifact_chunks` rows for visible artifacts; compile and retrieval paths do not read raw files. +4. Exclude artifacts whose `ingestion_status != 'ingested'`. +5. Match chunks by lexical query-term overlap and record match metadata including matched query terms and first match offset. +6. Order matches deterministically by matched query term count desc, first match offset asc, relative path asc, sequence no asc, and id asc. +7. Return stable summary metadata describing query terms, scope, searched artifact count, and ordering. + ## Security Model Implemented Now - User-owned continuity, trace, memory, embedding, entity, governance, task, task-step, task-workspace, task-artifact, and task-artifact-chunk tables enforce row-level security. @@ -224,6 +236,8 @@ The current multi-step boundary is narrow and explicit. Manual continuation is i - deterministic line-ending normalization and fixed-window chunk boundaries - invalid UTF-8 rejection - idempotent re-ingestion of already ingested artifacts + - deterministic lexical artifact-chunk retrieval by task and by artifact + - compile-path artifact chunk inclusion, exclusion, ordering, and per-user isolation - task-artifact and task-artifact-chunk per-user isolation - trace visibility for continuation and transition events - user isolation for task and task-step reads and mutations @@ -234,7 +248,7 @@ The current multi-step boundary is narrow and explicit. Manual continuation is i The following areas remain planned later and must not be described as implemented: - runner-style orchestration and automatic multi-step progression beyond the current explicit manual continuation seam -- retrieval over artifact chunks, chunk ranking, and embeddings beyond the current explicit rooted local ingestion boundary +- artifact chunk ranking beyond the current lexical match ordering, plus embeddings and semantic retrieval for artifact chunks - rich document parsing beyond the current narrow UTF-8 text and markdown ingestion boundary - read-only Gmail and Calendar connectors - broader tool proxying and real-world side effects beyond the current no-I/O `proxy.echo` handler diff --git a/BUILD_REPORT.md b/BUILD_REPORT.md index 4a4a636..ad24605 100644 --- a/BUILD_REPORT.md +++ b/BUILD_REPORT.md @@ -2,223 +2,283 @@ ## sprint objective -Implement Sprint 5E: Artifact Chunk Retrieval V0 by adding a narrow, deterministic lexical retrieval path over durable `task_artifact_chunks`, scoped to one visible task or one visible artifact, without adding embeddings, semantic ranking, compile-path integration, connectors, runners, or UI work. +Implement Sprint 5F: Artifact Chunk Compile Integration V0 by extending `POST /v0/context/compile` so it can optionally retrieve durable artifact chunks through the existing lexical retrieval seam, return them in a separate context-pack section, and trace artifact include/exclude decisions without adding embeddings, semantic retrieval, connectors, runners, or UI work. ## completed work -- Added retrieval contracts in `apps/api/src/alicebot_api/contracts.py`: - - `TaskScopedArtifactChunkRetrievalInput(task_id, query)` - - `ArtifactScopedArtifactChunkRetrievalInput(task_artifact_id, query)` - - `TaskArtifactChunkRetrievalMatch` - - `TaskArtifactChunkRetrievalItem` - - `TaskArtifactChunkRetrievalScope` - - `TaskArtifactChunkRetrievalSummary` - - `TaskArtifactChunkRetrievalResponse` - - `TASK_ARTIFACT_CHUNK_RETRIEVAL_ORDER = ["matched_query_term_count_desc", "first_match_char_start_asc", "relative_path_asc", "sequence_no_asc", "id_asc"]` -- Added retrieval behavior in `apps/api/src/alicebot_api/artifacts.py`: - - explicit query validation requiring at least one lexical word - - query normalization via casefolded unique `\w+` terms in first-occurrence order - - chunk matching against persisted chunk text only - - task-scoped retrieval across ingested artifacts for one visible task - - artifact-scoped retrieval for one visible artifact - - exclusion of artifacts whose `ingestion_status != "ingested"`, even if chunk rows exist - - deterministic response ordering with explicit per-item match metadata -- Added the minimal API routes in `apps/api/src/alicebot_api/main.py`: - - `POST /v0/tasks/{task_id}/artifact-chunks/retrieve` - - `POST /v0/task-artifacts/{task_artifact_id}/chunks/retrieve` -- Added store support in `apps/api/src/alicebot_api/store.py`: - - `list_task_artifacts_for_task(task_id)` +- Added optional compile-request artifact retrieval input with one explicit scope per request: + - `artifact_retrieval.kind = "task"` with `task_id`, `query`, `limit` + - `artifact_retrieval.kind = "artifact"` with `task_artifact_id`, `query`, `limit` +- Added internal typed compile contracts for artifact retrieval: + - `CompileContextTaskScopedArtifactRetrievalInput` + - `CompileContextArtifactScopedArtifactRetrievalInput` + - `CompileContextArtifactRetrievalInput` +- Added compile response contracts for a separate artifact section: + - `context_pack.artifact_chunks` + - `context_pack.artifact_chunk_summary` + - `ArtifactRetrievalDecisionTracePayload` +- Kept the artifact section response-shape stable even when retrieval is not requested: + - `artifact_chunks` returns `[]` + - `artifact_chunk_summary.requested` is `false` +- Integrated compile-time artifact retrieval into `compile_and_persist_trace()` using only durable `task_artifact_chunks` rows and the shipped lexical retrieval seam. +- Preserved existing continuity, memory, entity, and entity-edge behavior unchanged. +- Recorded compile trace decisions for: + - included artifact chunks + - artifact chunks excluded by the compile limit + - artifacts excluded because `ingestion_status != "ingested"` +- Added summary trace fields for artifact retrieval counts and scope kind. - Added unit and integration coverage for: - - deterministic retrieval ordering - - task-scoped retrieval - - artifact-scoped retrieval - - empty-result behavior - - exclusion of non-ingested artifacts - - per-user isolation - - stable response shape - -Exact retrieval contracts introduced: - -- Request inputs: - - `TaskScopedArtifactChunkRetrievalInput(task_id: UUID, query: str)` - - `ArtifactScopedArtifactChunkRetrievalInput(task_artifact_id: UUID, query: str)` -- Result item: - - `id` - - `task_id` - - `task_artifact_id` - - `relative_path` - - `media_type` - - `sequence_no` - - `char_start` - - `char_end_exclusive` - - `text` - - `match = {matched_query_terms, matched_query_term_count, first_match_char_start}` -- Summary metadata: - - `total_count` - - `searched_artifact_count` + - artifact compile request routing and validation + - deterministic artifact chunk ordering + - non-ingested artifact exclusion + - included and excluded artifact trace events + - per-user isolation through compile path + - stable compile response shape with the new section + +## incomplete work + +- None within Sprint 5F scope. + +## files changed + +- `apps/api/src/alicebot_api/contracts.py` +- `apps/api/src/alicebot_api/compiler.py` +- `apps/api/src/alicebot_api/main.py` +- `tests/unit/test_compiler.py` +- `tests/unit/test_main.py` +- `tests/integration/test_context_compile.py` +- `BUILD_REPORT.md` + +## exact compile contract changes introduced + +- `CompileContextRequest` now accepts optional `artifact_retrieval`. +- `artifact_retrieval` is a discriminated union: + - task scope: `{ "kind": "task", "task_id": "", "query": "", "limit": }` + - artifact scope: `{ "kind": "artifact", "task_artifact_id": "", "query": "", "limit": }` +- Artifact retrieval limits: + - default: `5` + - max: `50` +- `CompiledContextPack` now includes: + - `artifact_chunks: list[ContextPackArtifactChunk]` + - `artifact_chunk_summary: ContextPackArtifactChunkSummary` +- `artifact_chunk_summary` fields: + - `requested` + - `scope` - `query` - `query_terms` - `matching_rule` + - `limit` + - `searched_artifact_count` + - `candidate_count` + - `included_count` + - `excluded_uningested_artifact_count` + - `excluded_limit_count` - `order` - - `scope = {kind, task_id, task_artifact_id?}` -Lexical matching rule used: +## artifact retrieval matching and ordering rule used -- Rule id: `casefolded_unicode_word_overlap_unique_query_terms_v1` +- Matching rule id: `casefolded_unicode_word_overlap_unique_query_terms_v1` - Query normalization: - - casefold the query - - extract `\w+` terms - - deduplicate in first-occurrence order - - reject queries that produce zero terms -- Chunk match rule: - - casefold the stored chunk text - - extract `\w+` chunk terms - - a chunk matches when at least one normalized query term is present in the chunk term set - - `matched_query_terms` are returned in normalized query order - - `matched_query_term_count` is the count of distinct matched query terms - - `first_match_char_start` is the earliest start offset in the chunk text of any matched term - -Ordering rule used: - -- `matched_query_term_count` descending -- `first_match_char_start` ascending -- `relative_path` ascending -- `sequence_no` ascending -- `id` ascending - -Example task-scoped retrieval response: + - casefold query text + - extract unique `\w+` terms in first-occurrence order + - reject queries that contain no lexical terms +- Matching source: + - only persisted `task_artifact_chunks` rows attached to visible artifacts + - no raw file reads in compile path +- Exclusion rule: + - artifacts with `ingestion_status != "ingested"` are excluded from artifact chunk results +- Ordering: + - `matched_query_term_count_desc` + - `first_match_char_start_asc` + - `relative_path_asc` + - `sequence_no_asc` + - `id_asc` +- Compile limit behavior: + - ordering is applied first + - the first `limit` chunk matches are included + - remaining matches are traced as `artifact_chunk_limit_exceeded` + +## example compile request ```json { - "items": [ - { - "id": "11111111-1111-1111-1111-111111111111", - "task_id": "22222222-2222-2222-2222-222222222222", - "task_artifact_id": "33333333-3333-3333-3333-333333333333", - "relative_path": "docs/a.txt", - "media_type": "text/plain", - "sequence_no": 1, - "char_start": 0, - "char_end_exclusive": 14, - "text": "beta alpha doc", - "match": { - "matched_query_terms": ["alpha", "beta"], - "matched_query_term_count": 2, - "first_match_char_start": 0 - } - } - ], - "summary": { - "total_count": 1, - "searched_artifact_count": 1, + "user_id": "11111111-1111-1111-1111-111111111111", + "thread_id": "22222222-2222-2222-2222-222222222222", + "artifact_retrieval": { + "kind": "task", + "task_id": "33333333-3333-3333-3333-333333333333", "query": "Alpha beta", - "query_terms": ["alpha", "beta"], - "matching_rule": "casefolded_unicode_word_overlap_unique_query_terms_v1", - "order": [ - "matched_query_term_count_desc", - "first_match_char_start_asc", - "relative_path_asc", - "sequence_no_asc", - "id_asc" - ], - "scope": { - "kind": "task", - "task_id": "22222222-2222-2222-2222-222222222222" - } + "limit": 2 } } ``` -Example artifact-scoped retrieval response: +## example compile response with artifact section ```json { - "items": [ - { - "id": "44444444-4444-4444-4444-444444444444", - "task_id": "22222222-2222-2222-2222-222222222222", - "task_artifact_id": "55555555-5555-5555-5555-555555555555", - "relative_path": "notes/b.md", - "media_type": "text/markdown", - "sequence_no": 1, - "char_start": 0, - "char_end_exclusive": 15, - "text": "alpha beta note", - "match": { - "matched_query_terms": ["alpha", "beta"], - "matched_query_term_count": 2, - "first_match_char_start": 0 + "trace_id": "44444444-4444-4444-4444-444444444444", + "trace_event_count": 18, + "context_pack": { + "compiler_version": "continuity_v0", + "artifact_chunks": [ + { + "id": "55555555-5555-5555-5555-555555555555", + "task_id": "33333333-3333-3333-3333-333333333333", + "task_artifact_id": "66666666-6666-6666-6666-666666666666", + "relative_path": "docs/a.txt", + "media_type": "text/plain", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 14, + "text": "beta alpha doc", + "match": { + "matched_query_terms": ["alpha", "beta"], + "matched_query_term_count": 2, + "first_match_char_start": 0 + } + }, + { + "id": "77777777-7777-7777-7777-777777777777", + "task_id": "33333333-3333-3333-3333-333333333333", + "task_artifact_id": "88888888-8888-8888-8888-888888888888", + "relative_path": "notes/b.md", + "media_type": "text/markdown", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 15, + "text": "alpha beta note", + "match": { + "matched_query_terms": ["alpha", "beta"], + "matched_query_term_count": 2, + "first_match_char_start": 0 + } } - } - ], - "summary": { - "total_count": 1, - "searched_artifact_count": 1, - "query": "Alpha beta", - "query_terms": ["alpha", "beta"], - "matching_rule": "casefolded_unicode_word_overlap_unique_query_terms_v1", - "order": [ - "matched_query_term_count_desc", - "first_match_char_start_asc", - "relative_path_asc", - "sequence_no_asc", - "id_asc" ], - "scope": { - "kind": "artifact", - "task_id": "22222222-2222-2222-2222-222222222222", - "task_artifact_id": "55555555-5555-5555-5555-555555555555" + "artifact_chunk_summary": { + "requested": true, + "scope": { + "kind": "task", + "task_id": "33333333-3333-3333-3333-333333333333" + }, + "query": "Alpha beta", + "query_terms": ["alpha", "beta"], + "matching_rule": "casefolded_unicode_word_overlap_unique_query_terms_v1", + "limit": 2, + "searched_artifact_count": 3, + "candidate_count": 3, + "included_count": 2, + "excluded_uningested_artifact_count": 1, + "excluded_limit_count": 1, + "order": [ + "matched_query_term_count_desc", + "first_match_char_start_asc", + "relative_path_asc", + "sequence_no_asc", + "id_asc" + ] } } } ``` -## incomplete work - -- None within Sprint 5E scope. - -## files changed +## example artifact-retrieval trace events inside one compile run -- `apps/api/src/alicebot_api/artifacts.py` -- `apps/api/src/alicebot_api/contracts.py` -- `apps/api/src/alicebot_api/main.py` -- `apps/api/src/alicebot_api/store.py` -- `tests/integration/test_task_artifacts_api.py` -- `tests/unit/test_artifacts.py` -- `tests/unit/test_artifacts_main.py` -- `tests/unit/test_task_artifact_store.py` -- `BUILD_REPORT.md` +```json +[ + { + "kind": "context.included", + "payload": { + "entity_type": "artifact_chunk", + "entity_id": "55555555-5555-5555-5555-555555555555", + "reason": "within_artifact_chunk_limit", + "position": 1, + "scope_kind": "task", + "task_id": "33333333-3333-3333-3333-333333333333", + "task_artifact_id": "66666666-6666-6666-6666-666666666666", + "relative_path": "docs/a.txt", + "media_type": "text/plain", + "ingestion_status": "ingested", + "limit": 2, + "matched_query_terms": ["alpha", "beta"], + "matched_query_term_count": 2, + "first_match_char_start": 0, + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 14 + } + }, + { + "kind": "context.excluded", + "payload": { + "entity_type": "artifact_chunk", + "entity_id": "99999999-9999-9999-9999-999999999999", + "reason": "artifact_chunk_limit_exceeded", + "position": 3, + "scope_kind": "task", + "task_id": "33333333-3333-3333-3333-333333333333", + "task_artifact_id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", + "relative_path": "notes/c.txt", + "media_type": "text/plain", + "ingestion_status": "ingested", + "limit": 2, + "matched_query_terms": ["beta"], + "matched_query_term_count": 1, + "first_match_char_start": 0, + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 9 + } + }, + { + "kind": "context.excluded", + "payload": { + "entity_type": "task_artifact", + "entity_id": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", + "reason": "artifact_not_ingested", + "position": 3, + "scope_kind": "task", + "task_id": "33333333-3333-3333-3333-333333333333", + "task_artifact_id": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", + "relative_path": "notes/hidden.txt", + "media_type": "text/plain", + "ingestion_status": "pending", + "limit": 2 + } + } +] +``` ## tests run -- `./.venv/bin/python -m pytest tests/unit/test_artifacts.py tests/unit/test_artifacts_main.py tests/unit/test_task_artifact_store.py` - - result: `37 passed in 0.44s` -- `./.venv/bin/python -m pytest tests/unit/test_artifacts.py tests/unit/test_artifacts_main.py` - - result: `35 passed in 0.25s` -- `./.venv/bin/python -m pytest tests/integration/test_task_artifacts_api.py` +- `./.venv/bin/python -m pytest tests/unit/test_compiler.py` + - result: `4 passed in 0.21s` +- `./.venv/bin/python -m pytest tests/unit/test_main.py` + - result: `38 passed in 0.39s` +- `./.venv/bin/python -m pytest tests/integration/test_context_compile.py` - sandboxed attempt failed to reach local Postgres on `localhost:5432` with `Operation not permitted` +- `./.venv/bin/python -m pytest tests/integration/test_context_compile.py` + - rerun with local database access: `7 passed in 2.17s` - `./.venv/bin/python -m pytest tests/unit` - - result: `358 passed in 0.56s` + - result: `360 passed in 0.56s` - `./.venv/bin/python -m pytest tests/integration` - - rerun with local access: `105 passed in 29.62s` -- `git diff --check` - - result: passed + - rerun with local database access: `107 passed in 31.19s` ## blockers/issues - No remaining implementation blockers. -- Postgres-backed integration verification required unsandboxed localhost access. After rerun with local access, the full integration suite passed. +- Postgres-backed integration verification required elevated local database access because sandboxed localhost connections were denied. ## recommended next step -Build the next milestone on top of this deterministic read contract by adding richer retrieval quality or compile-path usage in a separate sprint, while keeping those changes explicitly scoped and test-backed. +Use this compile-path artifact section as the only seam for later retrieval upgrades, then add semantic retrieval or richer document handling in a separate sprint without changing this deterministic lexical contract. -## intentionally deferred +## what remains intentionally deferred to later milestones -- Embeddings for artifact chunks -- Semantic retrieval or reranking -- Compile-path integration of artifact chunks +- Artifact chunk embeddings +- Semantic retrieval or reranking for artifact chunks +- Compile-path merging of artifact chunks into memory or entity sections - PDF, DOCX, OCR, or richer document parsing -- Connector work +- Gmail or Calendar connectors - Runner or orchestration work - UI work diff --git a/REVIEW_REPORT.md b/REVIEW_REPORT.md index 83dbbe0..5ee4f13 100644 --- a/REVIEW_REPORT.md +++ b/REVIEW_REPORT.md @@ -6,21 +6,24 @@ PASS ## criteria met -- Retrieval is implemented only over durable `task_artifact_chunks` rows; the new logic in `apps/api/src/alicebot_api/artifacts.py` matches against persisted chunk text and does not read raw files during retrieval. -- Both required scopes are present and tested: - - task-scoped retrieval via `POST /v0/tasks/{task_id}/artifact-chunks/retrieve` - - artifact-scoped retrieval via `POST /v0/task-artifacts/{task_artifact_id}/chunks/retrieve` -- Matching is deterministic and lexical-only: - - query normalization uses casefolded `\w+` extraction with first-occurrence deduplication - - ordering is explicit and stable: matched term count desc, first match start asc, relative path asc, sequence no asc, id asc -- Non-ingested artifacts are excluded even if chunk rows exist. -- Per-user isolation is enforced through the existing user-scoped connection/RLS path and is covered by integration tests. -- Response shape is explicit and stable through the new retrieval contracts in `apps/api/src/alicebot_api/contracts.py`. -- Sprint scope stayed narrow: no embeddings, semantic retrieval, compile-path integration, connectors, runner logic, or UI work entered the implementation. -- `BUILD_REPORT.md` was updated and includes the required contracts, matching/order rules, commands, examples, and deferred scope. -- Acceptance test gates passed in this review: - - `./.venv/bin/python -m pytest tests/unit` -> `358 passed in 0.53s` - - `./.venv/bin/python -m pytest tests/integration` -> `105 passed in 29.84s` +- `POST /v0/context/compile` optionally accepts `artifact_retrieval` input and returns a separate `context_pack.artifact_chunks` section plus `artifact_chunk_summary`. +- Compile-path artifact retrieval uses only durable `task_artifact_chunks` rows through the existing lexical retrieval seam in `apps/api/src/alicebot_api/artifacts.py`. +- Non-ingested artifacts are excluded from compile-path artifact results and produce explicit exclusion trace events. +- Artifact include/exclude decisions are persisted in `trace_events`, and compile summary events expose artifact retrieval counters and scope kind. +- Artifact chunk ordering is deterministic and matches the documented order: + - matched query term count desc + - first match start asc + - relative path asc + - sequence no asc + - id asc +- Current continuity, memory, entity, and entity-edge sections remain intact and separate from artifact chunks. +- Task-scoped and artifact-scoped compile retrieval paths are both covered, including artifact-scoped happy-path coverage in `tests/integration/test_context_compile.py`. +- The sprint stayed within scope: no embeddings, semantic retrieval for artifact chunks, connectors, runner work, UI work, or raw-file reads in compile. +- Verification in this review: + - `./.venv/bin/python -m pytest tests/unit` -> `360 passed in 0.59s` + - `./.venv/bin/python -m pytest tests/integration` -> `107 passed in 29.88s` + - `./.venv/bin/python -m pytest tests/integration/test_context_compile.py` -> `8 passed in 2.42s` + - `git diff --check` -> passed ## criteria missed @@ -28,18 +31,16 @@ PASS ## quality issues -- No blocking implementation or test-quality issues found in the sprint code. -- Non-blocking process note: `.ai/active/SPRINT_PACKET.md` is part of the working diff. If that edit came from the Builder, sprint inputs should ideally remain reviewer-controlled so implementation is not changing its own source-of-truth spec. +- No blocking implementation or coverage issues found after the follow-up fixes. ## regression risks -- Low. The change is additive, scoped to artifact retrieval, and covered by unit plus Postgres-backed integration tests. -- Residual risk: retrieval behavior is intentionally simple lexical overlap, so future callers may over-assume ranking quality. That is consistent with the sprint packet and documented as deferred scope, not a defect in this sprint. +- Low. The change remains additive, narrowly scoped to compile-path artifact chunk inclusion, and is covered by unit plus Postgres-backed integration tests for ordering, exclusion, tracing, validation, and isolation. ## docs issues -- No required docs are missing for this sprint. -- No correction needed in `BUILD_REPORT.md` based on this review. +- `BUILD_REPORT.md` is aligned with the implementation and verification. +- `ARCHITECTURE.md` now reflects the shipped boundary through Sprint 5F and no longer misstates artifact retrieval as unimplemented. ## should anything be added to RULES.md? @@ -47,9 +48,8 @@ PASS ## should anything update ARCHITECTURE.md? -- No immediate update required for sprint acceptance. The architecture impact is narrow and already understandable from the code plus `BUILD_REPORT.md`. +- No further update is required for sprint acceptance. ## recommended next action -- Mark Sprint 5E as accepted and move to the next milestone in a separate sprint. -- If desired, tighten process hygiene by keeping `SPRINT_PACKET.md` outside Builder-owned changes unless Control Tower explicitly includes packet editing in scope. +- Mark Sprint 5F accepted and proceed to the next milestone in a separate sprint. diff --git a/apps/api/src/alicebot_api/compiler.py b/apps/api/src/alicebot_api/compiler.py index 3626eed..770319c 100644 --- a/apps/api/src/alicebot_api/compiler.py +++ b/apps/api/src/alicebot_api/compiler.py @@ -5,10 +5,16 @@ from alicebot_api.contracts import ( COMPILER_VERSION_V0, + ArtifactRetrievalDecisionTracePayload, CompilerDecision, + CompileContextArtifactRetrievalInput, + CompileContextArtifactScopedArtifactRetrievalInput, CompileContextSemanticRetrievalInput, + CompileContextTaskScopedArtifactRetrievalInput, CompilerRunResult, CompiledContextPack, + ContextPackArtifactChunk, + ContextPackArtifactChunkSummary, ContextCompilerLimits, ContextPackHybridMemorySummary, ContextPackMemory, @@ -16,11 +22,20 @@ HybridMemoryDecisionTracePayload, MemorySelectionSource, SEMANTIC_MEMORY_RETRIEVAL_ORDER, + TASK_ARTIFACT_CHUNK_RETRIEVAL_ORDER, SemanticMemoryRetrievalRequestInput, TRACE_KIND_CONTEXT_COMPILE, TraceEventRecord, isoformat_or_none, ) +from alicebot_api.artifacts import ( + TASK_ARTIFACT_CHUNK_RETRIEVAL_MATCHING_RULE, + TaskArtifactNotFoundError, + build_task_artifact_chunk_retrieval_scope, + infer_task_artifact_media_type, + resolve_artifact_chunk_retrieval_query_terms, + retrieve_matching_task_artifact_chunks, +) from alicebot_api.semantic_retrieval import validate_semantic_memory_retrieval_request from alicebot_api.store import ( ContinuityStore, @@ -33,6 +48,7 @@ ThreadRow, UserRow, ) +from alicebot_api.tasks import TaskNotFoundError SUMMARY_TRACE_EVENT_KIND = "context.summary" _UNBOUNDED_SEMANTIC_RETRIEVAL_LIMIT = 2_147_483_647 @@ -54,6 +70,13 @@ class CompiledMemorySection: decisions: list[CompilerDecision] +@dataclass(frozen=True, slots=True) +class CompiledArtifactChunkSection: + items: list[ContextPackArtifactChunk] + summary: ContextPackArtifactChunkSummary + decisions: list[CompilerDecision] + + @dataclass(slots=True) class HybridMemoryCandidate: memory: MemoryRow @@ -197,6 +220,59 @@ def _empty_hybrid_memory_summary() -> ContextPackHybridMemorySummary: } +def _empty_artifact_chunk_summary() -> ContextPackArtifactChunkSummary: + return { + "requested": False, + "scope": None, + "query": None, + "query_terms": [], + "matching_rule": TASK_ARTIFACT_CHUNK_RETRIEVAL_MATCHING_RULE, + "limit": 0, + "searched_artifact_count": 0, + "candidate_count": 0, + "included_count": 0, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "order": list(TASK_ARTIFACT_CHUNK_RETRIEVAL_ORDER), + } + + +def _artifact_retrieval_decision_metadata( + *, + scope_kind: str, + task_id: UUID, + task_artifact_id: UUID, + relative_path: str, + media_type: str | None, + ingestion_status: str, + limit: int, + match: dict[str, object] | None = None, + sequence_no: int | None = None, + char_start: int | None = None, + char_end_exclusive: int | None = None, +) -> ArtifactRetrievalDecisionTracePayload: + payload: ArtifactRetrievalDecisionTracePayload = { + "scope_kind": scope_kind, # type: ignore[typeddict-item] + "task_id": str(task_id), + "task_artifact_id": str(task_artifact_id), + "relative_path": relative_path, + "media_type": media_type, + "ingestion_status": ingestion_status, # type: ignore[typeddict-item] + "limit": limit, + } + if match is not None: + payload["matched_query_terms"] = list(match["matched_query_terms"]) # type: ignore[index] + payload["matched_query_term_count"] = int(match["matched_query_term_count"]) # type: ignore[index] + payload["first_match_char_start"] = int(match["first_match_char_start"]) # type: ignore[index] + if sequence_no is not None: + payload["sequence_no"] = sequence_no + if char_start is not None: + payload["char_start"] = char_start + if char_end_exclusive is not None: + payload["char_end_exclusive"] = char_end_exclusive + return payload + + def _hybrid_memory_decision_metadata( *, embedding_config_id: UUID | None, @@ -492,6 +568,125 @@ def _compile_memory_section( ) +def _compile_artifact_chunk_section( + store: ContinuityStore, + *, + artifact_retrieval: CompileContextArtifactRetrievalInput | None, +) -> CompiledArtifactChunkSection: + if artifact_retrieval is None: + return CompiledArtifactChunkSection( + items=[], + summary=_empty_artifact_chunk_summary(), + decisions=[], + ) + + if isinstance(artifact_retrieval, CompileContextTaskScopedArtifactRetrievalInput): + task = store.get_task_optional(artifact_retrieval.task_id) + if task is None: + raise TaskNotFoundError(f"task {artifact_retrieval.task_id} was not found") + artifact_rows = store.list_task_artifacts_for_task(artifact_retrieval.task_id) + scope = build_task_artifact_chunk_retrieval_scope( + kind="task", + task_id=artifact_retrieval.task_id, + ) + scope_kind = "task" + else: + artifact_row = store.get_task_artifact_optional(artifact_retrieval.task_artifact_id) + if artifact_row is None: + raise TaskArtifactNotFoundError( + f"task artifact {artifact_retrieval.task_artifact_id} was not found" + ) + artifact_rows = [artifact_row] + scope = build_task_artifact_chunk_retrieval_scope( + kind="artifact", + task_id=artifact_row["task_id"], + task_artifact_id=artifact_row["id"], + ) + scope_kind = "artifact" + + query_terms = resolve_artifact_chunk_retrieval_query_terms(artifact_retrieval.query) + matched_items, searched_artifact_count = retrieve_matching_task_artifact_chunks( + store, + artifact_rows=artifact_rows, + query_terms=query_terms, + ) + included_items = matched_items[: artifact_retrieval.limit] + excluded_uningested_artifact_count = 0 + decisions: list[CompilerDecision] = [] + + for position, artifact_row in enumerate(artifact_rows, start=1): + if artifact_row["ingestion_status"] == "ingested": + continue + excluded_uningested_artifact_count += 1 + decisions.append( + CompilerDecision( + "excluded", + "task_artifact", + artifact_row["id"], + "artifact_not_ingested", + position, + metadata=_artifact_retrieval_decision_metadata( + scope_kind=scope_kind, + task_id=artifact_row["task_id"], + task_artifact_id=artifact_row["id"], + relative_path=artifact_row["relative_path"], + media_type=infer_task_artifact_media_type(artifact_row), + ingestion_status=artifact_row["ingestion_status"], + limit=artifact_retrieval.limit, + ), + ) + ) + + for position, item in enumerate(matched_items, start=1): + decision_kind = "included" if position <= artifact_retrieval.limit else "excluded" + decision_reason = ( + "within_artifact_chunk_limit" + if position <= artifact_retrieval.limit + else "artifact_chunk_limit_exceeded" + ) + decisions.append( + CompilerDecision( + decision_kind, + "artifact_chunk", + UUID(item["id"]), + decision_reason, + position, + metadata=_artifact_retrieval_decision_metadata( + scope_kind=scope_kind, + task_id=UUID(item["task_id"]), + task_artifact_id=UUID(item["task_artifact_id"]), + relative_path=item["relative_path"], + media_type=item["media_type"], + ingestion_status="ingested", + limit=artifact_retrieval.limit, + match=item["match"], + sequence_no=item["sequence_no"], + char_start=item["char_start"], + char_end_exclusive=item["char_end_exclusive"], + ), + ) + ) + + return CompiledArtifactChunkSection( + items=list(included_items), + summary={ + "requested": True, + "scope": scope, + "query": artifact_retrieval.query, + "query_terms": list(query_terms), + "matching_rule": TASK_ARTIFACT_CHUNK_RETRIEVAL_MATCHING_RULE, + "limit": artifact_retrieval.limit, + "searched_artifact_count": searched_artifact_count, + "candidate_count": len(matched_items), + "included_count": len(included_items), + "excluded_uningested_artifact_count": excluded_uningested_artifact_count, + "excluded_limit_count": max(len(matched_items) - len(included_items), 0), + "order": list(TASK_ARTIFACT_CHUNK_RETRIEVAL_ORDER), + }, + decisions=decisions, + ) + + def compile_continuity_context( *, user: UserRow, @@ -503,6 +698,7 @@ def compile_continuity_context( entity_edges: list[EntityEdgeRow], limits: ContextCompilerLimits, memory_section: CompiledMemorySection | None = None, + artifact_chunk_section: CompiledArtifactChunkSection | None = None, ) -> CompilerRunResult: latest_session_sequence: dict[UUID, int] = {} for event in events: @@ -595,6 +791,12 @@ def compile_continuity_context( limits=limits, ) decisions.extend(resolved_memory_section.decisions) + resolved_artifact_chunk_section = artifact_chunk_section or CompiledArtifactChunkSection( + items=[], + summary=_empty_artifact_chunk_summary(), + decisions=[], + ) + decisions.extend(resolved_artifact_chunk_section.decisions) ordered_entities = sorted(entities, key=_entity_sort_key) included_entities = ordered_entities[-limits.max_entities :] if limits.max_entities > 0 else [] included_entity_ids = {entity["id"] for entity in included_entities} @@ -725,6 +927,24 @@ def compile_continuity_context( "included_dual_source_memory_count": resolved_memory_section.summary[ "hybrid_retrieval" ]["included_dual_source_count"], + "artifact_retrieval_requested": resolved_artifact_chunk_section.summary["requested"], + "artifact_retrieval_scope_kind": ( + None + if resolved_artifact_chunk_section.summary["scope"] is None + else resolved_artifact_chunk_section.summary["scope"]["kind"] + ), + "artifact_chunk_candidate_count": resolved_artifact_chunk_section.summary[ + "candidate_count" + ], + "included_artifact_chunk_count": resolved_artifact_chunk_section.summary[ + "included_count" + ], + "excluded_artifact_chunk_limit_count": resolved_artifact_chunk_section.summary[ + "excluded_limit_count" + ], + "excluded_uningested_artifact_count": resolved_artifact_chunk_section.summary[ + "excluded_uningested_artifact_count" + ], "included_entity_count": len(included_entities), "excluded_entity_count": excluded_entity_limit_count, "excluded_entity_limit_count": excluded_entity_limit_count, @@ -756,6 +976,8 @@ def compile_continuity_context( "events": [_serialize_event(event) for event in included_events], "memories": list(resolved_memory_section.items), "memory_summary": resolved_memory_section.summary, + "artifact_chunks": list(resolved_artifact_chunk_section.items), + "artifact_chunk_summary": resolved_artifact_chunk_section.summary, "entities": [_serialize_entity(entity) for entity in included_entities], "entity_summary": { "candidate_count": len(ordered_entities), @@ -781,6 +1003,7 @@ def compile_and_persist_trace( thread_id: UUID, limits: ContextCompilerLimits, semantic_retrieval: CompileContextSemanticRetrievalInput | None = None, + artifact_retrieval: CompileContextArtifactRetrievalInput | None = None, ) -> CompiledTraceRun: user = store.get_user(user_id) thread = store.get_thread(thread_id) @@ -793,6 +1016,10 @@ def compile_and_persist_trace( limits=limits, semantic_retrieval=semantic_retrieval, ) + artifact_chunk_section = _compile_artifact_chunk_section( + store, + artifact_retrieval=artifact_retrieval, + ) entities = store.list_entities() ordered_entities = sorted(entities, key=_entity_sort_key) included_entities = ordered_entities[-limits.max_entities :] if limits.max_entities > 0 else [] @@ -807,6 +1034,7 @@ def compile_and_persist_trace( entity_edges=entity_edges, limits=limits, memory_section=memory_section, + artifact_chunk_section=artifact_chunk_section, ) trace = store.create_trace( user_id=user_id, diff --git a/apps/api/src/alicebot_api/contracts.py b/apps/api/src/alicebot_api/contracts.py index c86549c..86e7934 100644 --- a/apps/api/src/alicebot_api/contracts.py +++ b/apps/api/src/alicebot_api/contracts.py @@ -2,7 +2,7 @@ from dataclasses import dataclass, field from datetime import datetime -from typing import Literal, NotRequired, TypedDict +from typing import Literal, NotRequired, TypeAlias, TypedDict from uuid import UUID from alicebot_api.store import JsonObject, JsonValue @@ -89,6 +89,8 @@ MAX_MEMORY_REVIEW_LIMIT = 100 DEFAULT_SEMANTIC_MEMORY_RETRIEVAL_LIMIT = 5 MAX_SEMANTIC_MEMORY_RETRIEVAL_LIMIT = 50 +DEFAULT_ARTIFACT_CHUNK_RETRIEVAL_LIMIT = 5 +MAX_ARTIFACT_CHUNK_RETRIEVAL_LIMIT = 50 COMPILER_VERSION_V0 = "continuity_v0" PROMPT_ASSEMBLY_VERSION_V0 = "prompt_assembly_v0" RESPONSE_GENERATION_VERSION_V0 = "response_generation_v0" @@ -201,6 +203,42 @@ def as_payload(self) -> JsonObject: } +@dataclass(frozen=True, slots=True) +class CompileContextTaskScopedArtifactRetrievalInput: + task_id: UUID + query: str + limit: int = DEFAULT_ARTIFACT_CHUNK_RETRIEVAL_LIMIT + + def as_payload(self) -> JsonObject: + return { + "kind": "task", + "task_id": str(self.task_id), + "query": self.query, + "limit": self.limit, + } + + +@dataclass(frozen=True, slots=True) +class CompileContextArtifactScopedArtifactRetrievalInput: + task_artifact_id: UUID + query: str + limit: int = DEFAULT_ARTIFACT_CHUNK_RETRIEVAL_LIMIT + + def as_payload(self) -> JsonObject: + return { + "kind": "artifact", + "task_artifact_id": str(self.task_artifact_id), + "query": self.query, + "limit": self.limit, + } + + +CompileContextArtifactRetrievalInput: TypeAlias = ( + CompileContextTaskScopedArtifactRetrievalInput + | CompileContextArtifactScopedArtifactRetrievalInput +) + + @dataclass(frozen=True, slots=True) class TraceCreate: user_id: UUID @@ -316,6 +354,50 @@ class ContextPackHybridMemorySummary(TypedDict): semantic_order: list[str] +class ContextPackArtifactChunk(TypedDict): + id: str + task_id: str + task_artifact_id: str + relative_path: str + media_type: str + sequence_no: int + char_start: int + char_end_exclusive: int + text: str + match: "TaskArtifactChunkRetrievalMatch" + + +class ContextPackArtifactChunkSummary(TypedDict): + requested: bool + scope: TaskArtifactChunkRetrievalScope | None + query: str | None + query_terms: list[str] + matching_rule: str + limit: int + searched_artifact_count: int + candidate_count: int + included_count: int + excluded_uningested_artifact_count: int + excluded_limit_count: int + order: list[str] + + +class ArtifactRetrievalDecisionTracePayload(TypedDict): + scope_kind: TaskArtifactChunkRetrievalScopeKind + task_id: str + task_artifact_id: str + relative_path: str + media_type: str | None + ingestion_status: TaskArtifactIngestionStatus + limit: int + matched_query_terms: NotRequired[list[str]] + matched_query_term_count: NotRequired[int] + first_match_char_start: NotRequired[int] + sequence_no: NotRequired[int] + char_start: NotRequired[int] + char_end_exclusive: NotRequired[int] + + class ContextPackMemorySummary(TypedDict): candidate_count: int included_count: int @@ -399,6 +481,8 @@ class CompiledContextPack(TypedDict): events: list[ContextPackEvent] memories: list[ContextPackMemory] memory_summary: ContextPackMemorySummary + artifact_chunks: list[ContextPackArtifactChunk] + artifact_chunk_summary: ContextPackArtifactChunkSummary entities: list[ContextPackEntity] entity_summary: ContextPackEntitySummary entity_edges: list[ContextPackEntityEdge] diff --git a/apps/api/src/alicebot_api/main.py b/apps/api/src/alicebot_api/main.py index 982becb..bab17c2 100644 --- a/apps/api/src/alicebot_api/main.py +++ b/apps/api/src/alicebot_api/main.py @@ -1,11 +1,11 @@ from __future__ import annotations from datetime import datetime -from typing import Literal, TypedDict +from typing import Annotated, Literal, TypedDict from uuid import UUID from fastapi import FastAPI, Query from fastapi.encoders import jsonable_encoder -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from fastapi.responses import JSONResponse from urllib.parse import urlsplit, urlunsplit @@ -15,9 +15,12 @@ ApprovalApproveInput, ApprovalRejectInput, ApprovalRequestCreateInput, + CompileContextArtifactScopedArtifactRetrievalInput, + CompileContextTaskScopedArtifactRetrievalInput, ConsentStatus, ConsentUpsertInput, CompileContextSemanticRetrievalInput, + DEFAULT_ARTIFACT_CHUNK_RETRIEVAL_LIMIT, DEFAULT_MAX_EVENTS, DEFAULT_MAX_ENTITY_EDGES, DEFAULT_MAX_ENTITIES, @@ -26,6 +29,7 @@ DEFAULT_MAX_SESSIONS, DEFAULT_SEMANTIC_MEMORY_RETRIEVAL_LIMIT, MAX_MEMORY_REVIEW_LIMIT, + MAX_ARTIFACT_CHUNK_RETRIEVAL_LIMIT, MAX_SEMANTIC_MEMORY_RETRIEVAL_LIMIT, ContextCompilerLimits, EmbeddingConfigStatus, @@ -242,6 +246,39 @@ class CompileContextSemanticRequest(BaseModel): ) +class CompileContextTaskScopedArtifactRetrievalRequest(BaseModel): + model_config = ConfigDict(extra="forbid") + + kind: Literal["task"] + task_id: UUID + query: str = Field(min_length=1, max_length=4000) + limit: int = Field( + default=DEFAULT_ARTIFACT_CHUNK_RETRIEVAL_LIMIT, + ge=1, + le=MAX_ARTIFACT_CHUNK_RETRIEVAL_LIMIT, + ) + + +class CompileContextArtifactScopedArtifactRetrievalRequest(BaseModel): + model_config = ConfigDict(extra="forbid") + + kind: Literal["artifact"] + task_artifact_id: UUID + query: str = Field(min_length=1, max_length=4000) + limit: int = Field( + default=DEFAULT_ARTIFACT_CHUNK_RETRIEVAL_LIMIT, + ge=1, + le=MAX_ARTIFACT_CHUNK_RETRIEVAL_LIMIT, + ) + + +CompileContextArtifactRetrievalRequest = Annotated[ + CompileContextTaskScopedArtifactRetrievalRequest + | CompileContextArtifactScopedArtifactRetrievalRequest, + Field(discriminator="kind"), +] + + class CompileContextRequest(BaseModel): user_id: UUID thread_id: UUID @@ -251,6 +288,7 @@ class CompileContextRequest(BaseModel): max_entities: int = Field(default=DEFAULT_MAX_ENTITIES, ge=0, le=50) max_entity_edges: int = Field(default=DEFAULT_MAX_ENTITY_EDGES, ge=0, le=100) semantic: CompileContextSemanticRequest | None = None + artifact_retrieval: CompileContextArtifactRetrievalRequest | None = None class GenerateResponseRequest(BaseModel): @@ -547,6 +585,22 @@ def healthcheck() -> JSONResponse: @app.post("/v0/context/compile") def compile_context(request: CompileContextRequest) -> JSONResponse: settings = get_settings() + artifact_retrieval = None + if isinstance(request.artifact_retrieval, CompileContextTaskScopedArtifactRetrievalRequest): + artifact_retrieval = CompileContextTaskScopedArtifactRetrievalInput( + task_id=request.artifact_retrieval.task_id, + query=request.artifact_retrieval.query, + limit=request.artifact_retrieval.limit, + ) + elif isinstance( + request.artifact_retrieval, + CompileContextArtifactScopedArtifactRetrievalRequest, + ): + artifact_retrieval = CompileContextArtifactScopedArtifactRetrievalInput( + task_artifact_id=request.artifact_retrieval.task_artifact_id, + query=request.artifact_retrieval.query, + limit=request.artifact_retrieval.limit, + ) try: with user_connection(settings.database_url, request.user_id) as conn: @@ -570,9 +624,14 @@ def compile_context(request: CompileContextRequest) -> JSONResponse: limit=request.semantic.limit, ) ), + artifact_retrieval=artifact_retrieval, ) + except TaskArtifactChunkRetrievalValidationError as exc: + return JSONResponse(status_code=400, content={"detail": str(exc)}) except SemanticMemoryRetrievalValidationError as exc: return JSONResponse(status_code=400, content={"detail": str(exc)}) + except (TaskNotFoundError, TaskArtifactNotFoundError) as exc: + return JSONResponse(status_code=404, content={"detail": str(exc)}) except ContinuityStoreInvariantError as exc: return JSONResponse(status_code=404, content={"detail": str(exc)}) diff --git a/tests/integration/test_context_compile.py b/tests/integration/test_context_compile.py index f86bfe7..4b6913b 100644 --- a/tests/integration/test_context_compile.py +++ b/tests/integration/test_context_compile.py @@ -278,6 +278,120 @@ def seed_memory_embedding_for_user( ) +def seed_compile_artifact_scope( + database_url: str, + *, + user_id: UUID, + thread_id: UUID, +) -> dict[str, object]: + with user_connection(database_url, user_id) as conn: + store = ContinuityStore(conn) + tool = store.create_tool( + tool_key="artifact.search", + name="Artifact Search", + description="Compile artifact retrieval fixture", + version="2026-03-14", + metadata_version="tool_metadata_v0", + active=True, + tags=[], + action_hints=["retrieve"], + scope_hints=["task"], + domain_hints=[], + risk_hints=[], + metadata={}, + ) + task = store.create_task( + thread_id=thread_id, + tool_id=tool["id"], + status="approved", + request={"action": "retrieve"}, + tool={"tool_key": "artifact.search"}, + latest_approval_id=None, + latest_execution_id=None, + ) + workspace = store.create_task_workspace( + task_id=task["id"], + status="active", + local_path=f"/tmp/alicebot/{task['id']}", + ) + docs_artifact = store.create_task_artifact( + task_id=task["id"], + task_workspace_id=workspace["id"], + status="registered", + ingestion_status="ingested", + relative_path="docs/a.txt", + media_type_hint="text/plain", + ) + notes_artifact = store.create_task_artifact( + task_id=task["id"], + task_workspace_id=workspace["id"], + status="registered", + ingestion_status="ingested", + relative_path="notes/b.md", + media_type_hint="text/markdown", + ) + pending_artifact = store.create_task_artifact( + task_id=task["id"], + task_workspace_id=workspace["id"], + status="registered", + ingestion_status="pending", + relative_path="notes/hidden.txt", + media_type_hint="text/plain", + ) + weak_artifact = store.create_task_artifact( + task_id=task["id"], + task_workspace_id=workspace["id"], + status="registered", + ingestion_status="ingested", + relative_path="notes/c.txt", + media_type_hint="text/plain", + ) + docs_chunk = store.create_task_artifact_chunk( + task_artifact_id=docs_artifact["id"], + sequence_no=1, + char_start=0, + char_end_exclusive=14, + text="beta alpha doc", + ) + notes_chunk = store.create_task_artifact_chunk( + task_artifact_id=notes_artifact["id"], + sequence_no=1, + char_start=0, + char_end_exclusive=15, + text="alpha beta note", + ) + pending_chunk = store.create_task_artifact_chunk( + task_artifact_id=pending_artifact["id"], + sequence_no=1, + char_start=0, + char_end_exclusive=17, + text="alpha beta hidden", + ) + weak_chunk = store.create_task_artifact_chunk( + task_artifact_id=weak_artifact["id"], + sequence_no=1, + char_start=0, + char_end_exclusive=9, + text="beta only", + ) + + return { + "task_id": task["id"], + "artifact_ids": { + "docs": docs_artifact["id"], + "notes": notes_artifact["id"], + "pending": pending_artifact["id"], + "weak": weak_artifact["id"], + }, + "chunk_ids": { + "docs": docs_chunk["id"], + "notes": notes_chunk["id"], + "pending": pending_chunk["id"], + "weak": weak_chunk["id"], + }, + } + + def test_compile_context_endpoint_persists_trace_and_trace_events(migrated_database_urls, monkeypatch) -> None: seeded = seed_traceable_thread(migrated_database_urls["app"]) user_id = seeded["user_id"] @@ -798,6 +912,316 @@ def test_compile_context_semantic_validation_rejects_missing_config_dimension_mi ) +def test_compile_context_artifact_retrieval_integrates_chunks_traces_and_exclusion_rules( + migrated_database_urls, + monkeypatch, +) -> None: + seeded = seed_traceable_thread(migrated_database_urls["app"]) + artifact_scope = seed_compile_artifact_scope( + migrated_database_urls["app"], + user_id=seeded["user_id"], + thread_id=seeded["thread_id"], + ) + monkeypatch.setattr( + main_module, + "get_settings", + lambda: Settings(database_url=migrated_database_urls["app"]), + ) + + status_code, payload = invoke_compile_context( + { + "user_id": str(seeded["user_id"]), + "thread_id": str(seeded["thread_id"]), + "artifact_retrieval": { + "kind": "task", + "task_id": str(artifact_scope["task_id"]), + "query": "Alpha beta", + "limit": 2, + }, + } + ) + + assert status_code == 200 + assert payload["context_pack"]["artifact_chunks"] == [ + { + "id": str(artifact_scope["chunk_ids"]["docs"]), + "task_id": str(artifact_scope["task_id"]), + "task_artifact_id": str(artifact_scope["artifact_ids"]["docs"]), + "relative_path": "docs/a.txt", + "media_type": "text/plain", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 14, + "text": "beta alpha doc", + "match": { + "matched_query_terms": ["alpha", "beta"], + "matched_query_term_count": 2, + "first_match_char_start": 0, + }, + }, + { + "id": str(artifact_scope["chunk_ids"]["notes"]), + "task_id": str(artifact_scope["task_id"]), + "task_artifact_id": str(artifact_scope["artifact_ids"]["notes"]), + "relative_path": "notes/b.md", + "media_type": "text/markdown", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 15, + "text": "alpha beta note", + "match": { + "matched_query_terms": ["alpha", "beta"], + "matched_query_term_count": 2, + "first_match_char_start": 0, + }, + }, + ] + assert payload["context_pack"]["artifact_chunk_summary"] == { + "requested": True, + "scope": {"kind": "task", "task_id": str(artifact_scope["task_id"])}, + "query": "Alpha beta", + "query_terms": ["alpha", "beta"], + "matching_rule": "casefolded_unicode_word_overlap_unique_query_terms_v1", + "limit": 2, + "searched_artifact_count": 3, + "candidate_count": 3, + "included_count": 2, + "excluded_uningested_artifact_count": 1, + "excluded_limit_count": 1, + "order": [ + "matched_query_term_count_desc", + "first_match_char_start_asc", + "relative_path_asc", + "sequence_no_asc", + "id_asc", + ], + } + assert payload["context_pack"]["memories"] + assert payload["context_pack"]["entities"] + + trace_id = UUID(payload["trace_id"]) + with user_connection(migrated_database_urls["app"], seeded["user_id"]) as conn: + trace_events = ContinuityStore(conn).list_trace_events(trace_id) + + assert any( + event["payload"]["reason"] == "within_artifact_chunk_limit" + and event["payload"]["entity_id"] == str(artifact_scope["chunk_ids"]["docs"]) + and event["payload"]["relative_path"] == "docs/a.txt" + and event["payload"]["matched_query_terms"] == ["alpha", "beta"] + for event in trace_events + if event["kind"] == "context.included" + ) + assert any( + event["payload"]["reason"] == "within_artifact_chunk_limit" + and event["payload"]["entity_id"] == str(artifact_scope["chunk_ids"]["notes"]) + and event["payload"]["relative_path"] == "notes/b.md" + for event in trace_events + if event["kind"] == "context.included" + ) + assert any( + event["payload"]["reason"] == "artifact_chunk_limit_exceeded" + and event["payload"]["entity_id"] == str(artifact_scope["chunk_ids"]["weak"]) + and event["payload"]["relative_path"] == "notes/c.txt" + for event in trace_events + if event["kind"] == "context.excluded" + ) + assert any( + event["payload"]["reason"] == "artifact_not_ingested" + and event["payload"]["entity_id"] == str(artifact_scope["artifact_ids"]["pending"]) + and event["payload"]["relative_path"] == "notes/hidden.txt" + and event["payload"]["ingestion_status"] == "pending" + for event in trace_events + if event["kind"] == "context.excluded" + ) + assert trace_events[-1]["payload"]["artifact_retrieval_requested"] is True + assert trace_events[-1]["payload"]["artifact_retrieval_scope_kind"] == "task" + assert trace_events[-1]["payload"]["artifact_chunk_candidate_count"] == 3 + assert trace_events[-1]["payload"]["included_artifact_chunk_count"] == 2 + assert trace_events[-1]["payload"]["excluded_artifact_chunk_limit_count"] == 1 + assert trace_events[-1]["payload"]["excluded_uningested_artifact_count"] == 1 + + +def test_compile_context_artifact_scoped_retrieval_returns_only_visible_artifact_chunks( + migrated_database_urls, + monkeypatch, +) -> None: + seeded = seed_traceable_thread(migrated_database_urls["app"]) + artifact_scope = seed_compile_artifact_scope( + migrated_database_urls["app"], + user_id=seeded["user_id"], + thread_id=seeded["thread_id"], + ) + monkeypatch.setattr( + main_module, + "get_settings", + lambda: Settings(database_url=migrated_database_urls["app"]), + ) + + status_code, payload = invoke_compile_context( + { + "user_id": str(seeded["user_id"]), + "thread_id": str(seeded["thread_id"]), + "artifact_retrieval": { + "kind": "artifact", + "task_artifact_id": str(artifact_scope["artifact_ids"]["notes"]), + "query": "Alpha beta", + "limit": 2, + }, + } + ) + + assert status_code == 200 + assert payload["context_pack"]["artifact_chunks"] == [ + { + "id": str(artifact_scope["chunk_ids"]["notes"]), + "task_id": str(artifact_scope["task_id"]), + "task_artifact_id": str(artifact_scope["artifact_ids"]["notes"]), + "relative_path": "notes/b.md", + "media_type": "text/markdown", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 15, + "text": "alpha beta note", + "match": { + "matched_query_terms": ["alpha", "beta"], + "matched_query_term_count": 2, + "first_match_char_start": 0, + }, + } + ] + assert payload["context_pack"]["artifact_chunk_summary"] == { + "requested": True, + "scope": { + "kind": "artifact", + "task_id": str(artifact_scope["task_id"]), + "task_artifact_id": str(artifact_scope["artifact_ids"]["notes"]), + }, + "query": "Alpha beta", + "query_terms": ["alpha", "beta"], + "matching_rule": "casefolded_unicode_word_overlap_unique_query_terms_v1", + "limit": 2, + "searched_artifact_count": 1, + "candidate_count": 1, + "included_count": 1, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "order": [ + "matched_query_term_count_desc", + "first_match_char_start_asc", + "relative_path_asc", + "sequence_no_asc", + "id_asc", + ], + } + + trace_id = UUID(payload["trace_id"]) + with user_connection(migrated_database_urls["app"], seeded["user_id"]) as conn: + trace_events = ContinuityStore(conn).list_trace_events(trace_id) + + assert any( + event["payload"]["reason"] == "within_artifact_chunk_limit" + and event["payload"]["entity_id"] == str(artifact_scope["chunk_ids"]["notes"]) + and event["payload"]["scope_kind"] == "artifact" + and event["payload"]["task_artifact_id"] == str(artifact_scope["artifact_ids"]["notes"]) + for event in trace_events + if event["kind"] == "context.included" + ) + assert trace_events[-1]["payload"]["artifact_retrieval_requested"] is True + assert trace_events[-1]["payload"]["artifact_retrieval_scope_kind"] == "artifact" + assert trace_events[-1]["payload"]["artifact_chunk_candidate_count"] == 1 + assert trace_events[-1]["payload"]["included_artifact_chunk_count"] == 1 + assert trace_events[-1]["payload"]["excluded_artifact_chunk_limit_count"] == 0 + assert trace_events[-1]["payload"]["excluded_uningested_artifact_count"] == 0 + + +def test_compile_context_artifact_retrieval_validation_and_isolation( + migrated_database_urls, + monkeypatch, +) -> None: + owner = seed_traceable_thread(migrated_database_urls["app"]) + intruder = seed_traceable_thread( + migrated_database_urls["app"], + email="intruder@example.com", + display_name="Intruder", + ) + owner_artifact_scope = seed_compile_artifact_scope( + migrated_database_urls["app"], + user_id=owner["user_id"], + thread_id=owner["thread_id"], + ) + monkeypatch.setattr( + main_module, + "get_settings", + lambda: Settings(database_url=migrated_database_urls["app"]), + ) + + blank_query_status, blank_query_payload = invoke_compile_context( + { + "user_id": str(owner["user_id"]), + "thread_id": str(owner["thread_id"]), + "artifact_retrieval": { + "kind": "task", + "task_id": str(owner_artifact_scope["task_id"]), + "query": " ", + "limit": 2, + }, + } + ) + invalid_shape_status, invalid_shape_payload = invoke_compile_context( + { + "user_id": str(owner["user_id"]), + "thread_id": str(owner["thread_id"]), + "artifact_retrieval": { + "kind": "task", + "task_artifact_id": str(owner_artifact_scope["artifact_ids"]["docs"]), + "query": "alpha beta", + }, + } + ) + isolated_task_status, isolated_task_payload = invoke_compile_context( + { + "user_id": str(intruder["user_id"]), + "thread_id": str(intruder["thread_id"]), + "artifact_retrieval": { + "kind": "task", + "task_id": str(owner_artifact_scope["task_id"]), + "query": "alpha beta", + "limit": 2, + }, + } + ) + isolated_artifact_status, isolated_artifact_payload = invoke_compile_context( + { + "user_id": str(intruder["user_id"]), + "thread_id": str(intruder["thread_id"]), + "artifact_retrieval": { + "kind": "artifact", + "task_artifact_id": str(owner_artifact_scope["artifact_ids"]["docs"]), + "query": "alpha beta", + "limit": 2, + }, + } + ) + + assert blank_query_status == 400 + assert blank_query_payload == { + "detail": "artifact chunk retrieval query must include at least one word" + } + assert invalid_shape_status == 422 + assert "task_id" in json.dumps(invalid_shape_payload) + assert isolated_task_status == 404 + assert isolated_task_payload == { + "detail": f"task {owner_artifact_scope['task_id']} was not found" + } + assert isolated_artifact_status == 404 + assert isolated_artifact_payload == { + "detail": ( + "task artifact " + f"{owner_artifact_scope['artifact_ids']['docs']} was not found" + ) + } + + def test_traces_and_trace_events_respect_per_user_isolation(migrated_database_urls, monkeypatch) -> None: seeded = seed_traceable_thread(migrated_database_urls["app"]) owner_id = seeded["user_id"] diff --git a/tests/unit/test_compiler.py b/tests/unit/test_compiler.py index c221707..e0c2cae 100644 --- a/tests/unit/test_compiler.py +++ b/tests/unit/test_compiler.py @@ -5,10 +5,15 @@ from alicebot_api.compiler import ( SUMMARY_TRACE_EVENT_KIND, + _compile_artifact_chunk_section, _compile_memory_section, compile_continuity_context, ) -from alicebot_api.contracts import CompileContextSemanticRetrievalInput, ContextCompilerLimits +from alicebot_api.contracts import ( + CompileContextSemanticRetrievalInput, + CompileContextTaskScopedArtifactRetrievalInput, + ContextCompilerLimits, +) def test_compile_continuity_context_is_deterministic_and_stably_ordered() -> None: @@ -287,6 +292,27 @@ def test_compile_continuity_context_is_deterministic_and_stably_ordered() -> Non "semantic_order": ["score_desc", "created_at_asc", "id_asc"], }, } + assert first_run.context_pack["artifact_chunks"] == [] + assert first_run.context_pack["artifact_chunk_summary"] == { + "requested": False, + "scope": None, + "query": None, + "query_terms": [], + "matching_rule": "casefolded_unicode_word_overlap_unique_query_terms_v1", + "limit": 0, + "searched_artifact_count": 0, + "candidate_count": 0, + "included_count": 0, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "order": [ + "matched_query_term_count_desc", + "first_match_char_start_asc", + "relative_path_asc", + "sequence_no_asc", + "id_asc", + ], + } assert first_run.context_pack["entity_summary"] == { "candidate_count": 3, "included_count": 2, @@ -596,6 +622,27 @@ def test_compile_continuity_context_records_included_and_excluded_reasons() -> N "semantic_order": ["score_desc", "created_at_asc", "id_asc"], }, } + assert compiler_run.context_pack["artifact_chunks"] == [] + assert compiler_run.context_pack["artifact_chunk_summary"] == { + "requested": False, + "scope": None, + "query": None, + "query_terms": [], + "matching_rule": "casefolded_unicode_word_overlap_unique_query_terms_v1", + "limit": 0, + "searched_artifact_count": 0, + "candidate_count": 0, + "included_count": 0, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "order": [ + "matched_query_term_count_desc", + "first_match_char_start_asc", + "relative_path_asc", + "sequence_no_asc", + "id_asc", + ], + } assert compiler_run.context_pack["entities"] == [ { "id": str(kept_entity_id), @@ -629,6 +676,11 @@ def test_compile_continuity_context_records_included_and_excluded_reasons() -> N assert compiler_run.trace_events[-1].payload["hybrid_memory_candidate_count"] == 2 assert compiler_run.trace_events[-1].payload["hybrid_memory_merged_candidate_count"] == 1 assert compiler_run.trace_events[-1].payload["hybrid_memory_deduplicated_count"] == 0 + assert compiler_run.trace_events[-1].payload["artifact_retrieval_requested"] is False + assert compiler_run.trace_events[-1].payload["artifact_chunk_candidate_count"] == 0 + assert compiler_run.trace_events[-1].payload["included_artifact_chunk_count"] == 0 + assert compiler_run.trace_events[-1].payload["excluded_artifact_chunk_limit_count"] == 0 + assert compiler_run.trace_events[-1].payload["excluded_uningested_artifact_count"] == 0 class SemanticCompileStoreStub: @@ -690,6 +742,186 @@ def list_memory_embeddings_for_config(self, embedding_config_id): ] +class ArtifactCompileStoreStub: + def __init__(self) -> None: + self.base_time = datetime(2026, 3, 14, 12, 0, tzinfo=UTC) + self.task_id = uuid4() + self.artifact_ids = [uuid4(), uuid4(), uuid4(), uuid4()] + self.chunk_ids = [uuid4(), uuid4(), uuid4()] + + def get_task_optional(self, task_id): + if task_id != self.task_id: + return None + return {"id": self.task_id} + + def list_task_artifacts_for_task(self, task_id): + assert task_id == self.task_id + return [ + { + "id": self.artifact_ids[0], + "task_id": self.task_id, + "task_workspace_id": uuid4(), + "status": "registered", + "ingestion_status": "ingested", + "relative_path": "docs/a.txt", + "media_type_hint": "text/plain", + "created_at": self.base_time, + "updated_at": self.base_time, + }, + { + "id": self.artifact_ids[1], + "task_id": self.task_id, + "task_workspace_id": uuid4(), + "status": "registered", + "ingestion_status": "ingested", + "relative_path": "notes/b.md", + "media_type_hint": "text/markdown", + "created_at": self.base_time + timedelta(minutes=1), + "updated_at": self.base_time + timedelta(minutes=1), + }, + { + "id": self.artifact_ids[2], + "task_id": self.task_id, + "task_workspace_id": uuid4(), + "status": "registered", + "ingestion_status": "pending", + "relative_path": "notes/hidden.txt", + "media_type_hint": "text/plain", + "created_at": self.base_time + timedelta(minutes=2), + "updated_at": self.base_time + timedelta(minutes=2), + }, + { + "id": self.artifact_ids[3], + "task_id": self.task_id, + "task_workspace_id": uuid4(), + "status": "registered", + "ingestion_status": "ingested", + "relative_path": "notes/c.txt", + "media_type_hint": "text/plain", + "created_at": self.base_time + timedelta(minutes=3), + "updated_at": self.base_time + timedelta(minutes=3), + }, + ] + + def list_task_artifact_chunks(self, task_artifact_id): + if task_artifact_id == self.artifact_ids[0]: + return [ + { + "id": self.chunk_ids[0], + "task_artifact_id": task_artifact_id, + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 14, + "text": "beta alpha doc", + "created_at": self.base_time, + "updated_at": self.base_time, + } + ] + if task_artifact_id == self.artifact_ids[1]: + return [ + { + "id": self.chunk_ids[1], + "task_artifact_id": task_artifact_id, + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 15, + "text": "alpha beta note", + "created_at": self.base_time, + "updated_at": self.base_time, + } + ] + if task_artifact_id == self.artifact_ids[3]: + return [ + { + "id": self.chunk_ids[2], + "task_artifact_id": task_artifact_id, + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 9, + "text": "beta only", + "created_at": self.base_time, + "updated_at": self.base_time, + } + ] + return [] + + +def test_compile_artifact_chunk_section_orders_limits_and_excludes_non_ingested() -> None: + store = ArtifactCompileStoreStub() + + artifact_section = _compile_artifact_chunk_section( + store, # type: ignore[arg-type] + artifact_retrieval=CompileContextTaskScopedArtifactRetrievalInput( + task_id=store.task_id, + query="Alpha beta", + limit=2, + ), + ) + + assert artifact_section.items == [ + { + "id": str(store.chunk_ids[0]), + "task_id": str(store.task_id), + "task_artifact_id": str(store.artifact_ids[0]), + "relative_path": "docs/a.txt", + "media_type": "text/plain", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 14, + "text": "beta alpha doc", + "match": { + "matched_query_terms": ["alpha", "beta"], + "matched_query_term_count": 2, + "first_match_char_start": 0, + }, + }, + { + "id": str(store.chunk_ids[1]), + "task_id": str(store.task_id), + "task_artifact_id": str(store.artifact_ids[1]), + "relative_path": "notes/b.md", + "media_type": "text/markdown", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 15, + "text": "alpha beta note", + "match": { + "matched_query_terms": ["alpha", "beta"], + "matched_query_term_count": 2, + "first_match_char_start": 0, + }, + }, + ] + assert artifact_section.summary == { + "requested": True, + "scope": {"kind": "task", "task_id": str(store.task_id)}, + "query": "Alpha beta", + "query_terms": ["alpha", "beta"], + "matching_rule": "casefolded_unicode_word_overlap_unique_query_terms_v1", + "limit": 2, + "searched_artifact_count": 3, + "candidate_count": 3, + "included_count": 2, + "excluded_uningested_artifact_count": 1, + "excluded_limit_count": 1, + "order": [ + "matched_query_term_count_desc", + "first_match_char_start_asc", + "relative_path_asc", + "sequence_no_asc", + "id_asc", + ], + } + assert [decision.reason for decision in artifact_section.decisions] == [ + "artifact_not_ingested", + "within_artifact_chunk_limit", + "within_artifact_chunk_limit", + "artifact_chunk_limit_exceeded", + ] + assert artifact_section.decisions[0].metadata["relative_path"] == "notes/hidden.txt" + assert artifact_section.decisions[-1].metadata["relative_path"] == "notes/c.txt" + + def test_compile_memory_section_orders_limits_and_excludes_deleted() -> None: store = SemanticCompileStoreStub() deleted_memory = { diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py index 446f108..7afeb2c 100644 --- a/tests/unit/test_main.py +++ b/tests/unit/test_main.py @@ -4,6 +4,7 @@ from contextlib import contextmanager from uuid import uuid4 +import pytest import apps.api.src.alicebot_api.main as main_module from apps.api.src.alicebot_api.config import Settings from alicebot_api.compiler import CompiledTraceRun @@ -180,12 +181,21 @@ def fake_user_connection(database_url: str, current_user_id): captured["current_user_id"] = current_user_id yield object() - def fake_compile_and_persist_trace(store, *, user_id, thread_id, limits, semantic_retrieval): + def fake_compile_and_persist_trace( + store, + *, + user_id, + thread_id, + limits, + semantic_retrieval, + artifact_retrieval, + ): captured["store_type"] = type(store).__name__ captured["user_id"] = user_id captured["thread_id"] = thread_id captured["limits"] = limits captured["semantic_retrieval"] = semantic_retrieval + captured["artifact_retrieval"] = artifact_retrieval return CompiledTraceRun( trace_id="trace-123", trace_event_count=5, @@ -248,6 +258,27 @@ def fake_compile_and_persist_trace(store, *, user_id, thread_id, limits, semanti "semantic_order": ["score_desc", "created_at_asc", "id_asc"], }, }, + "artifact_chunks": [], + "artifact_chunk_summary": { + "requested": False, + "scope": None, + "query": None, + "query_terms": [], + "matching_rule": "casefolded_unicode_word_overlap_unique_query_terms_v1", + "limit": 0, + "searched_artifact_count": 0, + "candidate_count": 0, + "included_count": 0, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "order": [ + "matched_query_term_count_desc", + "first_match_char_start_asc", + "relative_path_asc", + "sequence_no_asc", + "id_asc", + ], + }, "entities": [ { "id": "entity-123", @@ -362,6 +393,27 @@ def fake_compile_and_persist_trace(store, *, user_id, thread_id, limits, semanti "semantic_order": ["score_desc", "created_at_asc", "id_asc"], }, }, + "artifact_chunks": [], + "artifact_chunk_summary": { + "requested": False, + "scope": None, + "query": None, + "query_terms": [], + "matching_rule": "casefolded_unicode_word_overlap_unique_query_terms_v1", + "limit": 0, + "searched_artifact_count": 0, + "candidate_count": 0, + "included_count": 0, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "order": [ + "matched_query_term_count_desc", + "first_match_char_start_asc", + "relative_path_asc", + "sequence_no_asc", + "id_asc", + ], + }, "entities": [ { "id": "entity-123", @@ -406,6 +458,7 @@ def fake_compile_and_persist_trace(store, *, user_id, thread_id, limits, semanti assert captured["limits"].max_entities == 2 assert captured["limits"].max_entity_edges == 6 assert captured["semantic_retrieval"] is None + assert captured["artifact_retrieval"] is None def test_compile_context_returns_not_found_when_scope_row_is_missing(monkeypatch) -> None: @@ -433,7 +486,9 @@ def fake_user_connection(_database_url: str, _current_user_id): } -def test_compile_context_routes_semantic_inputs_and_validation_errors(monkeypatch) -> None: +def test_compile_context_routes_semantic_and_artifact_inputs_and_validation_errors( + monkeypatch, +) -> None: user_id = uuid4() thread_id = uuid4() config_id = uuid4() @@ -446,12 +501,21 @@ def fake_user_connection(database_url: str, current_user_id): captured["current_user_id"] = current_user_id yield object() - def fake_compile_and_persist_trace(store, *, user_id, thread_id, limits, semantic_retrieval): + def fake_compile_and_persist_trace( + store, + *, + user_id, + thread_id, + limits, + semantic_retrieval, + artifact_retrieval, + ): captured["store_type"] = type(store).__name__ captured["user_id"] = user_id captured["thread_id"] = thread_id captured["limits"] = limits captured["semantic_retrieval"] = semantic_retrieval + captured["artifact_retrieval"] = artifact_retrieval return CompiledTraceRun( trace_id="trace-semantic", trace_event_count=7, @@ -517,6 +581,44 @@ def fake_compile_and_persist_trace(store, *, user_id, thread_id, limits, semanti "semantic_order": ["score_desc", "created_at_asc", "id_asc"], }, }, + "artifact_chunks": [ + { + "id": "chunk-123", + "task_id": "task-123", + "task_artifact_id": "artifact-123", + "relative_path": "docs/spec.txt", + "media_type": "text/plain", + "sequence_no": 1, + "char_start": 0, + "char_end_exclusive": 16, + "text": "alpha beta spec", + "match": { + "matched_query_terms": ["alpha", "beta"], + "matched_query_term_count": 2, + "first_match_char_start": 0, + }, + } + ], + "artifact_chunk_summary": { + "requested": True, + "scope": {"kind": "task", "task_id": "task-123"}, + "query": "alpha beta", + "query_terms": ["alpha", "beta"], + "matching_rule": "casefolded_unicode_word_overlap_unique_query_terms_v1", + "limit": 2, + "searched_artifact_count": 1, + "candidate_count": 1, + "included_count": 1, + "excluded_uningested_artifact_count": 0, + "excluded_limit_count": 0, + "order": [ + "matched_query_term_count_desc", + "first_match_char_start_asc", + "relative_path_asc", + "sequence_no_asc", + "id_asc", + ], + }, "entities": [], "entity_summary": { "candidate_count": 0, @@ -546,6 +648,12 @@ def fake_compile_and_persist_trace(store, *, user_id, thread_id, limits, semanti query_vector=[0.1, 0.2, 0.3], limit=2, ), + artifact_retrieval=main_module.CompileContextTaskScopedArtifactRetrievalRequest( + kind="task", + task_id=uuid4(), + query="alpha beta", + limit=2, + ), ) ) @@ -572,6 +680,9 @@ def fake_compile_and_persist_trace(store, *, user_id, thread_id, limits, semanti assert captured["semantic_retrieval"].embedding_config_id == config_id assert captured["semantic_retrieval"].query_vector == (0.1, 0.2, 0.3) assert captured["semantic_retrieval"].limit == 2 + assert captured["artifact_retrieval"].task_id is not None + assert captured["artifact_retrieval"].query == "alpha beta" + assert captured["artifact_retrieval"].limit == 2 monkeypatch.setattr( main_module, @@ -601,6 +712,21 @@ def fake_compile_and_persist_trace(store, *, user_id, thread_id, limits, semanti } +def test_compile_context_request_rejects_invalid_artifact_scope_shape() -> None: + with pytest.raises(Exception) as exc_info: + main_module.CompileContextRequest( + user_id=uuid4(), + thread_id=uuid4(), + artifact_retrieval={ + "kind": "task", + "task_artifact_id": str(uuid4()), + "query": "alpha beta", + }, + ) + + assert "task_id" in str(exc_info.value) + + def test_generate_assistant_response_returns_assistant_and_trace_payload(monkeypatch) -> None: user_id = uuid4() thread_id = uuid4()