Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 137 additions & 0 deletions alembic/versions/036_partial_index_user_agent_slug_active.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
"""OSS port — slug uniqueness scoped to active rows via partial index.

Surface: ``agents`` table — replace the full UNIQUE constraint
``unique_user_agent_slug (user_id, slug)`` with a partial UNIQUE INDEX
of the same name, scoped to ``WHERE deleted_at IS NULL``.

## Provenance (parity port from private cueapi)

Cherry-pick of private cueapi's migration 080
(``080_partial_index_user_agent_slug_active.py``), shipped in private
cueapi PR #921 (merge_commit ``b770983e``, prod-verified via G14 5/5
PASS on api.cueapi.ai @ commit 454aac3). Renumbered 080 → 036 to
chain off public's current alembic head (035_agent_live_sessions_ipc_attachment).

Code shape verbatim; downgrade-safety-rail verbatim. The only
deviation is the migration number (public OSS-track has its own
alembic ordering).

## Why

Jingim's cue.dock.svc (which vendors public cueapi-core) hit this
empirically 2026-05-20: soft-deleting an agent on cueapi side leaves
the slug locked. ``POST /v1/agents`` for the same slug returns 409
``slug_taken``; ``GET /v1/agents/{slug}`` returns 404 (default
``include_deleted=false``). Net: no way to recreate a previously-
soft-deleted slug without going through ``?include_deleted=true`` +
reanimate semantics that don't exist.

Pre-fix workaround: migration 029 (private's 071 equivalent in OSS
order, if it ports) ``_build_archived_slug`` renames soft-deleted rows
to ``<slug>-archived-<8hex>`` to free the original slug for re-use.
Works in cueapi-internal code paths but cross-language consumers
(Dock / @trydock/mcp / presence-runtime) hit the constraint directly.

Postgres native fix: partial UNIQUE INDEX. The index enforces
uniqueness ONLY across rows with ``deleted_at IS NULL`` (active).
Soft-deleted rows keep their original slug; uniqueness no longer
applies once deleted.

## What this migration does

1. Drop the existing full UNIQUE constraint ``unique_user_agent_slug``.
2. Create a UNIQUE INDEX of the same name with
``WHERE deleted_at IS NULL``.

The index name is preserved so the existing IntegrityError-error-text
match at ``app/services/agent_service.py`` (``if "unique_user_agent_slug"
in str(e.orig)``) continues to work without code change.

## Effect on existing data

NONE. Existing soft-deleted rows stay as-is. They're all
``deleted_at IS NOT NULL`` → excluded from the partial index → no
uniqueness check applies. Future soft-deletes don't need a rename.

## Backward compatibility

The companion route change at ``app/services/agent_service.py``
(this PR's other half) preserves the existing 409 envelope shape +
adds an ``existing_uuid`` field. Existing clients that ignore
``existing_uuid`` continue to work; new clients (Jingim's Dock,
@trydock/mcp, etc.) read it to skip a GET round-trip.

## Downgrade safety

Refuses downgrade if any ``(user_id, slug)`` pair has BOTH an active
AND a soft-deleted row (or multiple soft-deleted rows with the same
slug). Recreating the full UNIQUE constraint would fail at execute
time, leaving the schema in a broken state. Force a deliberate
decision by failing the migration.

## OSS track-lag note

Private cueapi additionally raised ``agents.slug`` from ``VARCHAR(64)``
to ``VARCHAR(128)`` in migration 079 (cmpdl2bam PR #42 Substrate). That
column-width raise is NOT ported here; OSS keeps ``VARCHAR(64)``. Self-
hosters whose slugs fit in 64 chars (the common case) are unaffected.
If OSS users need the 128-char ceiling for labeled-Live composite
slugs, that's a separate port.
"""
from alembic import op
import sqlalchemy as sa


revision = "036"
down_revision = "035"
branch_labels = None
depends_on = None


def upgrade() -> None:
# Drop the existing full UNIQUE constraint.
op.drop_constraint(
"unique_user_agent_slug",
"agents",
type_="unique",
)
# Recreate as a partial UNIQUE INDEX with the same name. WHERE
# deleted_at IS NULL scopes the uniqueness check to active rows
# only; soft-deleted rows can share slugs with active rows or with
# each other.
op.create_index(
"unique_user_agent_slug",
"agents",
["user_id", "slug"],
unique=True,
postgresql_where=sa.text("deleted_at IS NULL"),
)


def downgrade() -> None:
# Safety rail: refuse to downgrade if any (user_id, slug) pair has
# duplicates across active + soft-deleted rows. The full UNIQUE
# constraint would fail to apply, leaving the schema broken.
op.execute(
"""
DO $$
DECLARE dup_count INT;
BEGIN
SELECT COUNT(*) INTO dup_count FROM (
SELECT user_id, slug
FROM agents
GROUP BY user_id, slug
HAVING COUNT(*) > 1
) sub;
IF dup_count > 0 THEN
RAISE EXCEPTION 'cannot downgrade migration 036: % (user_id, slug) pairs would violate the recreated UNIQUE constraint. Either delete the duplicates or hard-delete the soft-deleted rows before downgrading.', dup_count;
END IF;
END $$;
"""
)
op.drop_index("unique_user_agent_slug", table_name="agents")
op.create_unique_constraint(
"unique_user_agent_slug",
"agents",
["user_id", "slug"],
)
19 changes: 17 additions & 2 deletions app/models/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@
Index,
String,
Text,
UniqueConstraint,
func,
text,
)
from sqlalchemy.dialects.postgresql import JSONB, UUID

Expand Down Expand Up @@ -91,7 +91,22 @@ class Agent(Base):
)

__table_args__ = (
UniqueConstraint("user_id", "slug", name="unique_user_agent_slug"),
# Partial UNIQUE INDEX scoped to active (non-soft-deleted) rows.
# Replaces the prior full UNIQUE constraint per migration 036
# (OSS port of private cueapi PR #921 / migration 080). Soft-
# deleted rows can keep their original slug without blocking
# recreate. Match the migration's
# `postgresql_where=sa.text("deleted_at IS NULL")` exactly so
# the test-DB schema (built via Base.metadata.create_all) and
# the prod schema (built via alembic) declare identical
# constraint shape.
Index(
"unique_user_agent_slug",
"user_id",
"slug",
unique=True,
postgresql_where=text("deleted_at IS NULL"),
),
CheckConstraint(
"status IN ('online', 'offline', 'away')",
name="valid_agent_status",
Expand Down
69 changes: 64 additions & 5 deletions app/services/agent_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,59 @@ def _http_error(status: int, code: str, message: str) -> HTTPException:
)


async def _lookup_existing_live_agent_uuid(
db: AsyncSession, user_id, slug: str
) -> Optional[str]:
"""Look up the LIVE (non-soft-deleted) Agent's opaque id for a given
(user_id, slug) pair. Returns the ``agt_<id>`` or None if no LIVE
row exists (e.g., race-loss: row was hard-deleted between the
IntegrityError firing and this lookup).

Extracted as a pure helper so the existing_uuid lookup branch can
be unit-tested directly. pytest-cov on Python 3.12 doesn't reliably
trace ASGI-dispatched await lines through HTTP-routed tests; pure
helper invocation closes the trace gap (parity port from private
cueapi PR #921's discipline lesson).
"""
result = await db.execute(
select(Agent.id).where(
Agent.user_id == user_id,
Agent.slug == slug,
Agent.deleted_at.is_(None),
).limit(1)
)
return result.scalar_one_or_none()


def _http_error_slug_taken(
final_slug: str, existing_uuid: Optional[str]
) -> HTTPException:
"""409 slug_taken envelope variant that includes ``existing_uuid``.

Parity port of private cueapi PR #921 (Jingim Q1 ergonomics): when
POST /v1/agents fires the ``unique_user_agent_slug`` IntegrityError
against an existing LIVE agent (not soft-deleted), the response
envelope includes the conflicting agent's opaque id so the caller
can skip a GET round-trip and PATCH / address the agent directly.

``existing_uuid`` is Optional because the post-IntegrityError
lookup can race-lose (the conflicting row was hard-deleted between
the INSERT failure and the lookup) or constraint-mismatch fall-
through (a non-slug IntegrityError surface that the caller filters
on the constraint name; defensive). In those cases ``existing_uuid``
is null and the caller falls back to GET-then-PATCH semantics.
"""
detail = {
"error": {
"code": "slug_taken",
"message": f"slug '{final_slug}' already in use for this user",
"status": 409,
"existing_uuid": existing_uuid,
}
}
return HTTPException(status_code=409, detail=detail)


def _looks_like_opaque_id(addr: str) -> bool:
return (
len(addr) == OPAQUE_ID_LENGTH
Expand Down Expand Up @@ -185,13 +238,19 @@ async def create_agent(
except IntegrityError as e:
await db.rollback()
# Most likely cause: concurrent slug collision against the
# ``unique_user_agent_slug`` constraint.
# partial UNIQUE INDEX ``unique_user_agent_slug`` (migration
# 036, parity port of private cueapi PR #921). Post-port: the
# index is scoped to ``deleted_at IS NULL`` rows only, so a
# soft-deleted-slug-recreate path does NOT hit this
# IntegrityError. Only LIVE-duplicate collisions trip the
# constraint.
if "unique_user_agent_slug" in str(e.orig):
raise _http_error(
409,
"slug_taken",
f"slug '{final_slug}' already in use for this user",
# Look up the conflicting LIVE agent's opaque id via the
# pure helper (unit-testable; defeats ASGI trace gap).
existing_uuid = await _lookup_existing_live_agent_uuid(
db, user.id, final_slug
)
raise _http_error_slug_taken(final_slug, existing_uuid)
raise

await db.refresh(agent)
Expand Down
29 changes: 18 additions & 11 deletions parity-manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@
{
"feature": "live_fallback_mode (sender chooses queue vs bg on Live-miss)",
"private_origin": "alembic 067_messages_queue_effective_mode.py (queue state) + service-layer resolver",
"note": "Optional; ship Surface 6 minimal first (live + bg + inbox + webhook + auto resolver). live_fallback_mode is a sender-side preference for handling claim-miss Dock's Phase 1 may not need it. Defer to Step 4 follow-up OR fold into Step 2 if substrate work is small."
"note": "Optional; ship Surface 6 minimal first (live + bg + inbox + webhook + auto resolver). live_fallback_mode is a sender-side preference for handling claim-miss \u2014 Dock's Phase 1 may not need it. Defer to Step 4 follow-up OR fold into Step 2 if substrate work is small."
},
{
"feature": "cross-org delivery_mode gating (cross_org_cue_task_enabled etc.)",
Expand All @@ -182,20 +182,20 @@
}
],
"ports_already_landed_in_oss_for_surface_6_foundations": [
"alembic 028 + app/models/event.py + app/services/events_service.py event-emit primitive (PR #71, ports private #731 PR-1b)",
"alembic 029 messaging emission columns (PR #72, ports private #775 PR-2a)",
"alembic 030 message send_at (PR #77, ports private #623)",
"alembic 031 agents.last_seen_at + roster endpoint (PR #80, ports private #630)",
"alembic 033 subscriptions.inline_body (PR #84, ports private #791)",
"alembic 034 subscriptions.last_acked_event_id (PR #85, ports private #793)",
"alembic 026 agent_live_sessions schema (PR #67)",
"alembic 027 executions live-claim attestation (PR #70)",
"alembic 035 agent_live_sessions IPC attachment (PR #91)"
"alembic 028 + app/models/event.py + app/services/events_service.py \u2014 event-emit primitive (PR #71, ports private #731 PR-1b)",
"alembic 029 \u2014 messaging emission columns (PR #72, ports private #775 PR-2a)",
"alembic 030 \u2014 message send_at (PR #77, ports private #623)",
"alembic 031 \u2014 agents.last_seen_at + roster endpoint (PR #80, ports private #630)",
"alembic 033 \u2014 subscriptions.inline_body (PR #84, ports private #791)",
"alembic 034 \u2014 subscriptions.last_acked_event_id (PR #85, ports private #793)",
"alembic 026 \u2014 agent_live_sessions schema (PR #67)",
"alembic 027 \u2014 executions live-claim attestation (PR #70)",
"alembic 035 \u2014 agent_live_sessions IPC attachment (PR #91)"
],
"forward_references_in_oss_pending_surface_6": [
{
"path": "alembic/versions/035_agent_live_sessions_ipc_attachment.py",
"note": "Docstring mentions `delivery_mode_requested='ipc'` (line 42 'returns immediately with delivery_mode_requested='ipc'') as a forward-reference to behavior the (future) async fire-accept dispatcher will exhibit. The column itself is NOT added by 035 (which only touches agent_live_sessions); it lands when Surface 6's substrate migration ports. NOT a bug the ASYNC dispatcher path won't fire until Surface 6 is complete (no message-create path produces delivery_mode_requested='ipc' until then). Verified 2026-05-19 by cueapi-secondary during Step 1 audit."
"note": "Docstring mentions `delivery_mode_requested='ipc'` (line 42 'returns immediately with delivery_mode_requested='ipc'') as a forward-reference to behavior the (future) async fire-accept dispatcher will exhibit. The column itself is NOT added by 035 (which only touches agent_live_sessions); it lands when Surface 6's substrate migration ports. NOT a bug \u2014 the ASYNC dispatcher path won't fire until Surface 6 is complete (no message-create path produces delivery_mode_requested='ipc' until then). Verified 2026-05-19 by cueapi-secondary during Step 1 audit."
}
],
"sequencing_recommendation": [
Expand Down Expand Up @@ -358,6 +358,13 @@
"last_synced": "2026-05-12",
"ported_in": "item-b-phase-1-substrate",
"deviation": "OSS renumber 063\u2192035 (OSS alembic head was 034 when ported; private chain at 064 post-rebase). down_revision adjusted accordingly."
},
{
"path": "alembic/versions/036_partial_index_user_agent_slug_active.py",
"private_counterpart": "alembic/versions/080_partial_index_user_agent_slug_active.py",
"last_synced": "2026-05-21",
"ported_in": "cmpdl4-oss-port-slug-partial-index",
"deviation": "OSS renumber 080 \u2192 036 (OSS alembic head was 035 when ported; private chain at 079\u2192080 post PR #919's slug VARCHAR raise to 128). down_revision adjusted to 035. Schema/code verbatim from private's migration 080. Private additionally raised agents.slug from VARCHAR(64) to VARCHAR(128) in migration 079 (private PR #42 Substrate); that column-width raise is NOT ported here \u2014 OSS keeps VARCHAR(64). Source: private cueapi PR #921 merge_commit b770983e; G14 5/5 PASS verified on api.cueapi.ai @ commit 454aac3."
}
],
"app_core": [
Expand Down
Loading
Loading