diff --git a/BUILD_REPORT.md b/BUILD_REPORT.md index cc09658..58ae38d 100644 --- a/BUILD_REPORT.md +++ b/BUILD_REPORT.md @@ -2,129 +2,64 @@ ## sprint objective -Implement Sprint 5Q: extend the protected Gmail credential seam so the existing single-message ingestion path can renew an expired access token from refresh-token-backed credentials, while keeping Gmail account reads secret-free and leaving broader Gmail and Calendar scope deferred. +Implement Sprint 5R: extend the existing Gmail refresh path so single-message ingestion can persist and use a provider-rotated refresh token without exposing secrets in Gmail account reads or expanding into broader Gmail, Calendar, secret-manager, runner, compile-contract, or UI scope. ## completed work -- Added refresh-token-capable Gmail connect writes with an all-or-none refresh bundle: - - `refresh_token` - - `client_id` - - `client_secret` - - `access_token_expires_at` -- Extended the protected Gmail credential blob to support two narrow shapes: - - `gmail_oauth_access_token_v1` - - `gmail_oauth_refresh_token_v2` -- Added one explicit Gmail token-refresh path using `https://oauth2.googleapis.com/token`. -- Updated Gmail credential resolution so ingestion: - - uses the stored access token directly when the credential is still usable - - renews first when a refresh-capable credential is expired - - persists the refreshed access token and new expiry back into `gmail_account_credentials` -- Added a narrow protected-credential update store method for deterministic renewal writes. -- Added migration `20260316_0028_gmail_refresh_token_lifecycle.py` to: - - allow the new refreshable credential blob shape - - grant runtime `UPDATE` on `gmail_account_credentials` - - downgrade refreshable blobs back to the v1 access-token-only shape -- Kept Gmail account list/detail/connect responses secret-free on reads. -- Added unit and integration coverage for: - - refresh-token credential persistence - - successful renewal before single-message ingestion - - deterministic invalid refresh-credential failure - - secret-free account responses - - per-user isolation - - migration upgrade/downgrade compatibility -- Added direct unit coverage for the raw `refresh_gmail_access_token()` helper, including success, `400/401` rejection mapping, and malformed or transport refresh failures. -- Updated `ARCHITECTURE.md` so the implemented slice and Gmail connector flow reflect Sprint 5Q refresh-token lifecycle behavior. - -## exact Gmail refresh-token credential changes introduced - -- `GmailAccountConnectInput` now accepts optional `refresh_token`, `client_id`, `client_secret`, and `access_token_expires_at`. -- `ConnectGmailAccountRequest` enforces that those four refresh fields are all present together or all absent. -- Protected credential v1 remains: - -```json -{ - "credential_kind": "gmail_oauth_access_token_v1", - "access_token": "" -} -``` - -- New protected credential v2 is: - -```json -{ - "credential_kind": "gmail_oauth_refresh_token_v2", - "access_token": "", - "refresh_token": "", - "client_id": "", - "client_secret": "", - "access_token_expires_at": "" -} -``` - -- `gmail_account_credentials` now allows both v1 and v2 blobs and runtime `UPDATE` so renewal can rewrite the protected record in place. - -## token-renewal rule and renewal trigger used - -- Renewal trigger: if the protected credential kind is `gmail_oauth_refresh_token_v2` and `access_token_expires_at <= now(UTC)` during Gmail message ingestion. -- Renewal path: POST once to Google’s OAuth token endpoint with `client_id`, `client_secret`, `refresh_token`, and `grant_type=refresh_token`. -- Success behavior: store the new `access_token` plus a recalculated `access_token_expires_at`, then continue the existing single-message Gmail fetch with that refreshed token. -- Non-renewal behavior: if the credential is v1 or the v2 token has not yet expired, use the stored access token directly. -- Deterministic failure behavior: - - malformed local protected credentials -> `409` - - Google refresh rejection (`400`/`401`) -> `409` - - non-deterministic refresh transport/response failures -> `502` -- Failure guardrail: these failures occur before artifact registration, chunk ingestion, or filesystem writes for the Gmail message. +- Added rotation-aware Gmail refresh handling in `apps/api/src/alicebot_api/gmail.py`. +- Changed the refresh helper to capture an optional provider-returned replacement `refresh_token` alongside the renewed access token and expiry. +- Applied the protected-credential replacement rule: + - if Google returns a non-empty `refresh_token`, persist that replacement token + - otherwise keep the existing stored `refresh_token` +- Kept renewal writes inside the existing `gmail_account_credentials` seam and continued using the existing single-message ingestion path after a successful protected-credential update. +- Added a deterministic Gmail-specific persistence failure when renewed protected credentials cannot be written back, and mapped that failure to the existing `409` error envelope in `apps/api/src/alicebot_api/main.py`. +- Preserved secret-free Gmail account reads; no secret fields were added to list/detail/connect/ingest responses. +- Added unit coverage for: + - renewal without refresh-token rotation + - renewal with refresh-token rotation + - deterministic failure when protected-credential persistence fails + - raw refresh helper parsing of provider-returned rotated refresh tokens +- Added integration coverage for: + - renewal success without refresh-token rotation + - renewal success with refresh-token rotation + - deterministic failure when rotated credentials cannot be persisted + - unchanged secret-free response shape during the rotation-capable path ## incomplete work -- None inside Sprint 5Q scope. +- None inside Sprint 5R scope. ## files changed -- `apps/api/src/alicebot_api/contracts.py` - `apps/api/src/alicebot_api/gmail.py` - `apps/api/src/alicebot_api/main.py` -- `apps/api/src/alicebot_api/store.py` -- `apps/api/alembic/versions/20260316_0028_gmail_refresh_token_lifecycle.py` -- `ARCHITECTURE.md` - `tests/unit/test_gmail.py` - `tests/unit/test_gmail_main.py` - `tests/unit/test_gmail_refresh.py` -- `tests/unit/test_20260316_0028_gmail_refresh_token_lifecycle.py` - `tests/integration/test_gmail_accounts_api.py` -- `tests/integration/test_migrations.py` - `BUILD_REPORT.md` -## exact commands run +## tests run -- `./.venv/bin/python -m pytest tests/unit/test_gmail.py tests/unit/test_gmail_main.py tests/unit/test_20260316_0028_gmail_refresh_token_lifecycle.py` -- `./.venv/bin/python -m pytest tests/integration/test_gmail_accounts_api.py tests/integration/test_migrations.py` -- `./.venv/bin/python -m pytest tests/unit/test_gmail_refresh.py tests/unit/test_gmail.py tests/unit/test_gmail_main.py` - `./.venv/bin/python -m pytest tests/unit` + - Result: `437 passed in 0.64s` - `./.venv/bin/python -m pytest tests/integration` + - Result: `139 passed in 45.52s` -## tests run +## exact commands run -- `./.venv/bin/python -m pytest tests/unit/test_gmail.py tests/unit/test_gmail_main.py tests/unit/test_20260316_0028_gmail_refresh_token_lifecycle.py` - - Result: `27 passed in 0.52s` -- `./.venv/bin/python -m pytest tests/integration/test_gmail_accounts_api.py tests/integration/test_migrations.py` - - Result: `12 passed in 4.35s` -- `./.venv/bin/python -m pytest tests/unit/test_gmail_refresh.py tests/unit/test_gmail.py tests/unit/test_gmail_main.py` - - Result: `31 passed in 0.29s` +- `./.venv/bin/python -m pytest tests/unit/test_gmail.py tests/unit/test_gmail_refresh.py tests/unit/test_gmail_main.py` +- `./.venv/bin/python -m pytest tests/unit/test_gmail.py tests/unit/test_gmail_refresh.py tests/unit/test_gmail_main.py tests/integration/test_gmail_accounts_api.py -k 'gmail'` - `./.venv/bin/python -m pytest tests/unit` - - Result: `434 passed in 0.67s` - `./.venv/bin/python -m pytest tests/integration` - - Result: `137 passed in 41.99s` ## unit and integration test results -- Required unit suite passed. -- Required integration suite passed. -- Gmail-focused unit and integration coverage passed before the full packet-level runs. -- Direct unit coverage now pins the raw Gmail token-refresh helper’s success and error mapping behavior. -- Migration coverage now verifies both: - - the original Gmail credential hardening round trip - - the new refresh-token lifecycle migration round trip back to a downgrade-safe v1 blob +- Gmail-focused unit tests passed after updating the remaining stale test double to the new refresh return shape. +- One intermediate sandboxed combined Gmail run could not reach the local Postgres fixture on `localhost:5432`; this was an environment restriction, not an application test failure. +- Required final verification passed: + - `tests/unit`: `437 passed in 0.64s` + - `tests/integration`: `139 passed in 45.52s` ## one example Gmail account response proving secret-free reads remain intact @@ -134,7 +69,7 @@ Implement Sprint 5Q: extend the protected Gmail credential seam so the existing "id": "", "provider": "gmail", "auth_kind": "oauth_access_token", - "provider_account_id": "acct-owner-refresh-001", + "provider_account_id": "acct-owner-rotated-001", "email_address": "owner@gmail.example", "display_name": "Owner", "scope": "https://www.googleapis.com/auth/gmail.readonly", @@ -144,7 +79,7 @@ Implement Sprint 5Q: extend the protected Gmail credential seam so the existing } ``` -## one example Gmail ingestion response through the renewal-capable path +## one example Gmail ingestion response through the rotation-capable path ```json { @@ -152,7 +87,7 @@ Implement Sprint 5Q: extend the protected Gmail credential seam so the existing "id": "", "provider": "gmail", "auth_kind": "oauth_access_token", - "provider_account_id": "acct-owner-refresh-001", + "provider_account_id": "acct-owner-rotated-001", "email_address": "owner@gmail.example", "display_name": "Owner", "scope": "https://www.googleapis.com/auth/gmail.readonly", @@ -161,7 +96,7 @@ Implement Sprint 5Q: extend the protected Gmail credential seam so the existing }, "message": { "provider_message_id": "msg-001", - "artifact_relative_path": "gmail/acct-owner-refresh-001/msg-001.eml", + "artifact_relative_path": "gmail/acct-owner-rotated-001/msg-001.eml", "media_type": "message/rfc822" }, "artifact": { @@ -170,14 +105,14 @@ Implement Sprint 5Q: extend the protected Gmail credential seam so the existing "task_workspace_id": "", "status": "registered", "ingestion_status": "ingested", - "relative_path": "gmail/acct-owner-refresh-001/msg-001.eml", + "relative_path": "gmail/acct-owner-rotated-001/msg-001.eml", "media_type_hint": "message/rfc822", "created_at": "", "updated_at": "" }, "summary": { - "total_count": 1, - "total_characters": 18, + "total_count": "", + "total_characters": "", "media_type": "message/rfc822", "chunking_rule": "normalized_utf8_text_fixed_window_1000_chars_v1", "order": ["sequence_no_asc", "id_asc"] @@ -188,7 +123,7 @@ Implement Sprint 5Q: extend the protected Gmail credential seam so the existing ## blockers/issues - No remaining implementation blockers. -- Integration verification required local Postgres access outside the default sandbox. +- Integration verification required elevated access because the default sandbox blocked connections to the local Postgres test fixture on `localhost:5432`. ## what remains intentionally deferred to later milestones @@ -202,8 +137,7 @@ Implement Sprint 5Q: extend the protected Gmail credential seam so the existing - compile-contract changes - runner-style orchestration - UI work -- refresh-token rotation beyond the single explicit renewal path ## recommended next step -Keep the next Gmail milestone equally narrow: either add refresh-token rotation handling or move the protected Gmail secret seam into an external secret manager, but do not combine that work with search, sync, Calendar, or UI expansion. +Keep the next Gmail sprint narrow around one adjacent auth seam only, such as external secret-manager integration for the existing protected credential store, without combining it with search, sync, Calendar, or UI expansion. diff --git a/REVIEW_REPORT.md b/REVIEW_REPORT.md index 0e2c22c..0cc5b9c 100644 --- a/REVIEW_REPORT.md +++ b/REVIEW_REPORT.md @@ -6,21 +6,20 @@ PASS ## criteria met -- The sprint remains within the Sprint 5Q boundary. I found no Gmail search, sync, attachments, write actions, Calendar, compile-contract, runner, or UI scope in the implementation or follow-up changes. -- The protected Gmail credential seam supports the narrow refresh-token-capable credential shape required by the packet, without reintroducing secrets to `gmail_accounts`. -- Gmail account connect, list, and detail reads remain secret-free. Coverage asserts that `access_token`, `refresh_token`, and `client_secret` do not appear in account response payloads. -- The existing single-message Gmail ingestion path renews an expired access token through one explicit refresh path and then continues through the existing RFC822 artifact ingestion seam. -- Missing or invalid refresh credentials fail deterministically before fetch, artifact registration, chunk ingestion, or workspace writes. -- Per-user isolation remains intact through the existing user-scoped store access, row-level security, and the `(gmail_account_id, user_id)` ownership binding on protected credentials. -- Migration coverage includes the refresh-token lifecycle round trip and downgrade compatibility. -- The prior review follow-ups are now addressed: - - `ARCHITECTURE.md` reflects Sprint 5Q and the renewal-before-ingestion Gmail seam. - - `tests/unit/test_gmail_refresh.py` adds direct coverage for the raw refresh helper success path, `400/401` rejection mapping, and malformed or transport failures. -- `BUILD_REPORT.md` now matches the implemented Sprint 5Q behavior and includes the follow-up test coverage/doc updates. -- Verified locally: - - `./.venv/bin/python -m pytest tests/unit/test_gmail_refresh.py tests/unit/test_gmail.py tests/unit/test_gmail_main.py` -> `31 passed in 0.28s` - - `./.venv/bin/python -m pytest tests/unit` -> `434 passed in 0.64s` - - `./.venv/bin/python -m pytest tests/integration` -> `137 passed in 40.83s` from the prior sprint review run, with no subsequent code-path changes beyond docs and unit-test additions +- Sprint stayed narrow. The code changes are limited to the Gmail renewal seam, the ingest endpoint error mapping, Gmail-focused tests, and `BUILD_REPORT.md`. +- Rotated refresh tokens are now handled in the protected credential seam. `apps/api/src/alicebot_api/gmail.py` captures an optional provider-returned `refresh_token` during renewal and persists it back through `gmail_account_credentials`. +- The replacement rule matches the packet: + - if the provider returns a non-empty replacement `refresh_token`, persist it + - otherwise keep the existing stored `refresh_token` +- Gmail account reads remain secret-free. No secret fields were added to list/detail/connect/ingest responses, and existing account list/detail isolation tests still pass. +- Single-message Gmail ingestion still works for both cases: + - stable refresh-token renewal + - rotated refresh-token renewal +- Rotated-credential persistence failures are deterministic and happen before Gmail fetch/artifact writes. The new error is mapped to the existing `409` envelope, and tests verify no artifact or credential corruption when persistence fails. +- Required verification passed: + - `./.venv/bin/python -m pytest tests/unit` -> `437 passed in 0.63s` + - `./.venv/bin/python -m pytest tests/integration` -> `139 passed in 42.27s` +- No out-of-scope Gmail search, sync, attachments, write actions, Calendar, external secret-manager, compile-contract, runner, or UI work entered the sprint. ## criteria missed @@ -28,24 +27,24 @@ PASS ## quality issues -- No blocking implementation, test, documentation, regression, or scope issues remain for Sprint 5Q. +- None material for Sprint 5R. ## regression risks -- Residual risk is limited to intentionally deferred scope already called out by the sprint packet: Gmail search, sync, attachments, write actions, Calendar, external secret-manager integration, compile-contract changes, runner orchestration, UI work, and refresh-token rotation beyond the single explicit renewal path. +- Low. The change is localized to the existing Gmail renewal path and is covered by both unit and Postgres-backed integration tests for stable-token renewal, rotated-token renewal, failure handling, secret-free responses, and user isolation. ## docs issues -- None. +- None. `BUILD_REPORT.md` includes the rotation change summary, replacement rule, commands run, test results, secret-free account example, rotation-capable ingestion example, and deferred scope. ## should anything be added to RULES.md? -- No. +- No. This is a narrow connector implementation detail, not a new repository-wide rule. ## should anything update ARCHITECTURE.md? -- No further architecture update is required for this sprint. +- No. The sprint does not introduce a new architectural boundary or subsystem; it hardens the existing Gmail protected-credential seam. ## recommended next action -- Accept Sprint 5Q as complete and proceed with the normal merge/approval flow. Keep the next Gmail milestone equally narrow and avoid combining secret-lifecycle work with search, sync, Calendar, or UI expansion. +- Accept Sprint 5R and move to the next narrow auth-adjacent milestone without broadening scope. diff --git a/apps/api/src/alicebot_api/gmail.py b/apps/api/src/alicebot_api/gmail.py index 5d60a33..016c8c3 100644 --- a/apps/api/src/alicebot_api/gmail.py +++ b/apps/api/src/alicebot_api/gmail.py @@ -39,7 +39,7 @@ TaskArtifactIngestInput, TaskArtifactRegisterInput, ) -from alicebot_api.store import ContinuityStore, GmailAccountRow +from alicebot_api.store import ContinuityStore, ContinuityStoreInvariantError, GmailAccountRow from alicebot_api.workspaces import TaskWorkspaceNotFoundError GMAIL_MESSAGE_FETCH_TIMEOUT_SECONDS = 30 @@ -81,6 +81,10 @@ class GmailCredentialRefreshError(RuntimeError): """Raised when Gmail access-token renewal fails for non-deterministic reasons.""" +class GmailCredentialPersistenceError(RuntimeError): + """Raised when renewed Gmail protected credentials cannot be persisted.""" + + class GmailCredentialValidationError(ValueError): """Raised when Gmail connect input contains an invalid credential combination.""" @@ -95,6 +99,13 @@ class ParsedGmailCredential: access_token_expires_at: datetime | None = None +@dataclass(frozen=True, slots=True) +class RefreshedGmailCredential: + access_token: str + access_token_expires_at: datetime + refresh_token: str | None = None + + def serialize_gmail_account_row(row: GmailAccountRow) -> GmailAccountRecord: return { "id": str(row["id"]), @@ -242,7 +253,7 @@ def refresh_gmail_access_token( refresh_token: str, client_id: str, client_secret: str, -) -> tuple[str, datetime]: +) -> RefreshedGmailCredential: request = Request( GMAIL_TOKEN_REFRESH_URL, data=urlencode( @@ -277,6 +288,7 @@ def refresh_gmail_access_token( ) from exc refreshed_access_token = _coerce_nonempty_string(payload.get("access_token")) + replacement_refresh_token = _coerce_nonempty_string(payload.get("refresh_token")) expires_in = payload.get("expires_in") if refreshed_access_token is None or not isinstance(expires_in, (int, float)) or expires_in <= 0: raise GmailCredentialRefreshError( @@ -284,7 +296,42 @@ def refresh_gmail_access_token( ) refreshed_expires_at = datetime.now(UTC) + timedelta(seconds=float(expires_in)) - return refreshed_access_token, refreshed_expires_at + return RefreshedGmailCredential( + access_token=refreshed_access_token, + access_token_expires_at=refreshed_expires_at, + refresh_token=replacement_refresh_token, + ) + + +def _persist_refreshed_gmail_credential( + store: ContinuityStore, + *, + gmail_account_id: UUID, + auth_kind: str, + existing_credential: ParsedGmailCredential, + refreshed_credential: RefreshedGmailCredential, +) -> None: + replacement_refresh_token = ( + refreshed_credential.refresh_token + if refreshed_credential.refresh_token is not None + else existing_credential.refresh_token + ) + try: + store.update_gmail_account_credential( + gmail_account_id=gmail_account_id, + auth_kind=auth_kind, + credential_blob=build_gmail_protected_credential_blob( + access_token=refreshed_credential.access_token, + refresh_token=replacement_refresh_token, + client_id=existing_credential.client_id, + client_secret=existing_credential.client_secret, + access_token_expires_at=refreshed_credential.access_token_expires_at, + ), + ) + except (ContinuityStoreInvariantError, psycopg.Error) as exc: + raise GmailCredentialPersistenceError( + f"gmail account {gmail_account_id} renewed protected credentials could not be persisted" + ) from exc def resolve_gmail_access_token( @@ -314,24 +361,20 @@ def resolve_gmail_access_token( ): return parsed_credential.access_token - refreshed_access_token, refreshed_expires_at = refresh_gmail_access_token( + refreshed_credential = refresh_gmail_access_token( gmail_account_id=gmail_account_id, refresh_token=parsed_credential.refresh_token, client_id=parsed_credential.client_id, client_secret=parsed_credential.client_secret, ) - store.update_gmail_account_credential( + _persist_refreshed_gmail_credential( + store, gmail_account_id=gmail_account_id, auth_kind=credential["auth_kind"], - credential_blob=build_gmail_protected_credential_blob( - access_token=refreshed_access_token, - refresh_token=parsed_credential.refresh_token, - client_id=parsed_credential.client_id, - client_secret=parsed_credential.client_secret, - access_token_expires_at=refreshed_expires_at, - ), + existing_credential=parsed_credential, + refreshed_credential=refreshed_credential, ) - return refreshed_access_token + return refreshed_credential.access_token def create_gmail_account_record( diff --git a/apps/api/src/alicebot_api/main.py b/apps/api/src/alicebot_api/main.py index 7382b06..f23b11e 100644 --- a/apps/api/src/alicebot_api/main.py +++ b/apps/api/src/alicebot_api/main.py @@ -142,6 +142,7 @@ GmailAccountAlreadyExistsError, GmailCredentialInvalidError, GmailCredentialNotFoundError, + GmailCredentialPersistenceError, GmailCredentialRefreshError, GmailCredentialValidationError, GmailAccountNotFoundError, @@ -1404,7 +1405,11 @@ def ingest_gmail_message( return JSONResponse(status_code=404, content={"detail": str(exc)}) except GmailMessageUnsupportedError as exc: return JSONResponse(status_code=400, content={"detail": str(exc)}) - except (GmailCredentialNotFoundError, GmailCredentialInvalidError) as exc: + except ( + GmailCredentialNotFoundError, + GmailCredentialInvalidError, + GmailCredentialPersistenceError, + ) as exc: return JSONResponse(status_code=409, content={"detail": str(exc)}) except TaskArtifactValidationError as exc: return JSONResponse(status_code=400, content={"detail": str(exc)}) diff --git a/tests/integration/test_gmail_accounts_api.py b/tests/integration/test_gmail_accounts_api.py index 8385474..ec4c7aa 100644 --- a/tests/integration/test_gmail_accounts_api.py +++ b/tests/integration/test_gmail_accounts_api.py @@ -397,7 +397,10 @@ def test_gmail_message_ingestion_endpoint_renews_expired_access_token( monkeypatch.setattr( gmail_module, "refresh_gmail_access_token", - lambda **_kwargs: ("token-refreshed", datetime.fromisoformat("2030-01-01T00:05:00+00:00")), + lambda **_kwargs: gmail_module.RefreshedGmailCredential( + access_token="token-refreshed", + access_token_expires_at=datetime.fromisoformat("2030-01-01T00:05:00+00:00"), + ), ) def fake_fetch_gmail_message_raw_bytes(*, access_token: str, **_kwargs) -> bytes: @@ -474,6 +477,234 @@ def fake_fetch_gmail_message_raw_bytes(*, access_token: str, **_kwargs) -> bytes assert credential_row[5] == "2030-01-01T00:05:00+00:00" +def test_gmail_message_ingestion_endpoint_persists_rotated_refresh_token( + migrated_database_urls, + monkeypatch, + tmp_path, +) -> None: + owner = seed_task(migrated_database_urls["app"], email="owner@example.com") + workspace_root = tmp_path / "task-workspaces" + monkeypatch.setattr( + main_module, + "get_settings", + lambda: Settings( + database_url=migrated_database_urls["app"], + task_workspace_root=str(workspace_root), + ), + ) + raw_bytes = _build_rfc822_email_bytes(subject="Inbox Update", plain_body="rotated token path") + fetch_tokens: list[str] = [] + + monkeypatch.setattr( + gmail_module, + "refresh_gmail_access_token", + lambda **_kwargs: gmail_module.RefreshedGmailCredential( + access_token="token-refreshed-rotated", + access_token_expires_at=datetime.fromisoformat("2030-01-01T00:05:00+00:00"), + refresh_token="refresh-owner-rotated-002", + ), + ) + + def fake_fetch_gmail_message_raw_bytes(*, access_token: str, **_kwargs) -> bytes: + fetch_tokens.append(access_token) + return raw_bytes + + monkeypatch.setattr( + gmail_module, + "fetch_gmail_message_raw_bytes", + fake_fetch_gmail_message_raw_bytes, + ) + + account_status, account_payload = _connect_gmail_account( + user_id=owner["user_id"], + provider_account_id="acct-owner-rotated-001", + email_address="owner@gmail.example", + credential_overrides={ + "refresh_token": "refresh-owner-001", + "client_id": "client-owner-001", + "client_secret": "secret-owner-001", + "access_token_expires_at": "2020-01-01T00:00:00+00:00", + }, + ) + workspace_status, workspace_payload = invoke_request( + "POST", + f"/v0/tasks/{owner['task_id']}/workspace", + payload={"user_id": str(owner["user_id"])}, + ) + ingest_status, ingest_payload = invoke_request( + "POST", + f"/v0/gmail-accounts/{account_payload['account']['id']}/messages/msg-001/ingest", + payload={ + "user_id": str(owner["user_id"]), + "task_workspace_id": workspace_payload["workspace"]["id"], + }, + ) + + assert account_status == 201 + assert workspace_status == 201 + assert ingest_status == 200 + assert fetch_tokens == ["token-refreshed-rotated"] + assert ingest_payload == { + "account": { + **account_payload["account"], + }, + "message": { + "provider_message_id": "msg-001", + "artifact_relative_path": "gmail/acct-owner-rotated-001/msg-001.eml", + "media_type": "message/rfc822", + }, + "artifact": { + "id": ingest_payload["artifact"]["id"], + "task_id": str(owner["task_id"]), + "task_workspace_id": workspace_payload["workspace"]["id"], + "status": "registered", + "ingestion_status": "ingested", + "relative_path": "gmail/acct-owner-rotated-001/msg-001.eml", + "media_type_hint": "message/rfc822", + "created_at": ingest_payload["artifact"]["created_at"], + "updated_at": ingest_payload["artifact"]["updated_at"], + }, + "summary": { + "total_count": ingest_payload["summary"]["total_count"], + "total_characters": ingest_payload["summary"]["total_characters"], + "media_type": "message/rfc822", + "chunking_rule": "normalized_utf8_text_fixed_window_1000_chars_v1", + "order": ["sequence_no_asc", "id_asc"], + }, + } + assert '"refresh_token":' not in json.dumps(ingest_payload) + assert '"client_secret":' not in json.dumps(ingest_payload) + + with psycopg.connect(migrated_database_urls["admin"]) as conn: + with conn.cursor() as cur: + cur.execute( + """ + SELECT + credential_blob ->> 'credential_kind', + credential_blob ->> 'access_token', + credential_blob ->> 'refresh_token', + credential_blob ->> 'client_id', + credential_blob ->> 'client_secret', + credential_blob ->> 'access_token_expires_at' + FROM gmail_account_credentials + WHERE gmail_account_id = %s + """, + (UUID(account_payload["account"]["id"]),), + ) + credential_row = cur.fetchone() + + assert credential_row is not None + assert credential_row[0] == "gmail_oauth_refresh_token_v2" + assert credential_row[1] == "token-refreshed-rotated" + assert credential_row[2] == "refresh-owner-rotated-002" + assert credential_row[3] == "client-owner-001" + assert credential_row[4] == "secret-owner-001" + assert credential_row[5] == "2030-01-01T00:05:00+00:00" + + +def test_gmail_message_ingestion_endpoint_fails_deterministically_when_rotated_credentials_cannot_be_persisted( + migrated_database_urls, + monkeypatch, + tmp_path, +) -> None: + owner = seed_task(migrated_database_urls["app"], email="owner@example.com") + workspace_root = tmp_path / "task-workspaces" + monkeypatch.setattr( + main_module, + "get_settings", + lambda: Settings( + database_url=migrated_database_urls["app"], + task_workspace_root=str(workspace_root), + ), + ) + + monkeypatch.setattr( + gmail_module, + "refresh_gmail_access_token", + lambda **_kwargs: gmail_module.RefreshedGmailCredential( + access_token="token-refreshed-rotated", + access_token_expires_at=datetime.fromisoformat("2030-01-01T00:05:00+00:00"), + refresh_token="refresh-owner-rotated-002", + ), + ) + + def fail_update_gmail_account_credential(self, **_kwargs): + raise psycopg.Error("simulated credential persistence failure") + + def fail_fetch(**_kwargs): + raise AssertionError("fetch_gmail_message_raw_bytes should not be called") + + monkeypatch.setattr( + ContinuityStore, + "update_gmail_account_credential", + fail_update_gmail_account_credential, + ) + monkeypatch.setattr(gmail_module, "fetch_gmail_message_raw_bytes", fail_fetch) + + _, account_payload = _connect_gmail_account( + user_id=owner["user_id"], + provider_account_id="acct-owner-rotated-001", + email_address="owner@gmail.example", + credential_overrides={ + "refresh_token": "refresh-owner-001", + "client_id": "client-owner-001", + "client_secret": "secret-owner-001", + "access_token_expires_at": "2020-01-01T00:00:00+00:00", + }, + ) + _, workspace_payload = invoke_request( + "POST", + f"/v0/tasks/{owner['task_id']}/workspace", + payload={"user_id": str(owner["user_id"])}, + ) + + ingest_status, ingest_payload = invoke_request( + "POST", + f"/v0/gmail-accounts/{account_payload['account']['id']}/messages/msg-001/ingest", + payload={ + "user_id": str(owner["user_id"]), + "task_workspace_id": workspace_payload["workspace"]["id"], + }, + ) + + assert ingest_status == 409 + assert ingest_payload == { + "detail": ( + f"gmail account {account_payload['account']['id']} renewed protected credentials " + "could not be persisted" + ) + } + artifact_file = ( + Path(workspace_payload["workspace"]["local_path"]) / "gmail" / "acct-owner-rotated-001" / "msg-001.eml" + ) + assert not artifact_file.exists() + + with user_connection(migrated_database_urls["app"], owner["user_id"]) as conn: + store = ContinuityStore(conn) + assert store.list_task_artifacts_for_task(owner["task_id"]) == [] + + with psycopg.connect(migrated_database_urls["admin"]) as conn: + with conn.cursor() as cur: + cur.execute( + """ + SELECT + credential_blob ->> 'access_token', + credential_blob ->> 'refresh_token', + credential_blob ->> 'access_token_expires_at' + FROM gmail_account_credentials + WHERE gmail_account_id = %s + """, + (UUID(account_payload["account"]["id"]),), + ) + credential_row = cur.fetchone() + + assert credential_row == ( + f"token-for-{account_payload['account']['provider_account_id']}", + "refresh-owner-001", + "2020-01-01T00:00:00+00:00", + ) + + def test_gmail_message_ingestion_endpoint_rejects_cross_user_workspace_access( migrated_database_urls, monkeypatch, diff --git a/tests/unit/test_gmail.py b/tests/unit/test_gmail.py index f482807..3242532 100644 --- a/tests/unit/test_gmail.py +++ b/tests/unit/test_gmail.py @@ -19,8 +19,10 @@ GmailAccountNotFoundError, GmailCredentialInvalidError, GmailCredentialNotFoundError, + GmailCredentialPersistenceError, GmailCredentialValidationError, GmailMessageUnsupportedError, + RefreshedGmailCredential, build_gmail_message_artifact_relative_path, build_gmail_protected_credential_blob, create_gmail_account_record, @@ -29,6 +31,7 @@ list_gmail_account_records, resolve_gmail_access_token, ) +from alicebot_api.store import ContinuityStoreInvariantError from alicebot_api.workspaces import TaskWorkspaceNotFoundError @@ -460,7 +463,10 @@ def test_resolve_gmail_access_token_renews_expired_refreshable_credential(monkey monkeypatch.setattr( "alicebot_api.gmail.refresh_gmail_access_token", - lambda **_kwargs: ("token-2", refreshed_at), + lambda **_kwargs: RefreshedGmailCredential( + access_token="token-2", + access_token_expires_at=refreshed_at, + ), ) assert resolve_gmail_access_token(store, gmail_account_id=account_id) == "token-2" @@ -478,6 +484,94 @@ def test_resolve_gmail_access_token_renews_expired_refreshable_credential(monkey ] +def test_resolve_gmail_access_token_persists_rotated_refresh_token(monkeypatch) -> None: + store = GmailStoreStub() + expired_at = datetime(2020, 1, 1, 0, 0, tzinfo=UTC) + refreshed_at = datetime(2030, 1, 1, 0, 5, tzinfo=UTC) + account = create_gmail_account_record( + store, + user_id=uuid4(), + request=GmailAccountConnectInput( + provider_account_id="acct-001", + email_address="owner@example.com", + display_name="Owner", + scope=GMAIL_READONLY_SCOPE, + access_token="token-1", + refresh_token="refresh-1", + client_id="client-1", + client_secret="secret-1", + access_token_expires_at=expired_at, + ), + )["account"] + account_id = UUID(account["id"]) + + monkeypatch.setattr( + "alicebot_api.gmail.refresh_gmail_access_token", + lambda **_kwargs: RefreshedGmailCredential( + access_token="token-2", + access_token_expires_at=refreshed_at, + refresh_token="refresh-2", + ), + ) + + assert resolve_gmail_access_token(store, gmail_account_id=account_id) == "token-2" + assert store.gmail_account_credentials[account_id]["credential_blob"] == { + "credential_kind": GMAIL_REFRESHABLE_PROTECTED_CREDENTIAL_KIND, + "access_token": "token-2", + "refresh_token": "refresh-2", + "client_id": "client-1", + "client_secret": "secret-1", + "access_token_expires_at": refreshed_at.isoformat(), + } + + +def test_resolve_gmail_access_token_fails_deterministically_when_persisting_refresh_update_fails( + monkeypatch, +) -> None: + store = GmailStoreStub() + expired_at = datetime(2020, 1, 1, 0, 0, tzinfo=UTC) + refreshed_at = datetime(2030, 1, 1, 0, 5, tzinfo=UTC) + account = create_gmail_account_record( + store, + user_id=uuid4(), + request=GmailAccountConnectInput( + provider_account_id="acct-001", + email_address="owner@example.com", + display_name="Owner", + scope=GMAIL_READONLY_SCOPE, + access_token="token-1", + refresh_token="refresh-1", + client_id="client-1", + client_secret="secret-1", + access_token_expires_at=expired_at, + ), + )["account"] + account_id = UUID(account["id"]) + original_blob = dict(store.gmail_account_credentials[account_id]["credential_blob"]) + + monkeypatch.setattr( + "alicebot_api.gmail.refresh_gmail_access_token", + lambda **_kwargs: RefreshedGmailCredential( + access_token="token-2", + access_token_expires_at=refreshed_at, + refresh_token="refresh-2", + ), + ) + + def fail_update(**_kwargs): + raise ContinuityStoreInvariantError("update_gmail_account_credential did not return a row") + + monkeypatch.setattr(store, "update_gmail_account_credential", fail_update) + + with pytest.raises( + GmailCredentialPersistenceError, + match=f"gmail account {account_id} renewed protected credentials could not be persisted", + ): + resolve_gmail_access_token(store, gmail_account_id=account_id) + + assert store.gmail_account_credentials[account_id]["credential_blob"] == original_blob + + def test_resolve_gmail_access_token_rejects_invalid_refreshable_protected_credentials() -> None: store = GmailStoreStub() account = create_gmail_account_record( @@ -662,7 +756,10 @@ def test_ingest_gmail_message_record_renews_expired_access_token_before_fetch( monkeypatch.setattr( "alicebot_api.gmail.refresh_gmail_access_token", - lambda **_kwargs: ("token-refreshed", datetime(2030, 1, 1, 0, 5, tzinfo=UTC)), + lambda **_kwargs: RefreshedGmailCredential( + access_token="token-refreshed", + access_token_expires_at=datetime(2030, 1, 1, 0, 5, tzinfo=UTC), + ), ) def fake_fetch(**kwargs): diff --git a/tests/unit/test_gmail_main.py b/tests/unit/test_gmail_main.py index 24adb3a..2eaf011 100644 --- a/tests/unit/test_gmail_main.py +++ b/tests/unit/test_gmail_main.py @@ -12,6 +12,7 @@ GmailAccountNotFoundError, GmailCredentialInvalidError, GmailCredentialNotFoundError, + GmailCredentialPersistenceError, GmailCredentialRefreshError, GmailCredentialValidationError, GmailMessageFetchError, @@ -266,6 +267,25 @@ def fake_invalid_credentials(*_args, **_kwargs): "detail": f"gmail account {gmail_account_id} has invalid protected credentials" } + def fake_persistence_error(*_args, **_kwargs): + raise GmailCredentialPersistenceError( + f"gmail account {gmail_account_id} renewed protected credentials could not be persisted" + ) + + monkeypatch.setattr(main_module, "ingest_gmail_message_record", fake_persistence_error) + response = main_module.ingest_gmail_message( + gmail_account_id, + "msg-001", + main_module.IngestGmailMessageRequest( + user_id=user_id, + task_workspace_id=task_workspace_id, + ), + ) + assert response.status_code == 409 + assert json.loads(response.body) == { + "detail": f"gmail account {gmail_account_id} renewed protected credentials could not be persisted" + } + def fake_fetch_error(*_args, **_kwargs): raise GmailMessageFetchError("gmail message msg-001 could not be fetched") diff --git a/tests/unit/test_gmail_refresh.py b/tests/unit/test_gmail_refresh.py index 779e454..da7c9d7 100644 --- a/tests/unit/test_gmail_refresh.py +++ b/tests/unit/test_gmail_refresh.py @@ -14,6 +14,7 @@ GMAIL_TOKEN_REFRESH_URL, GmailCredentialInvalidError, GmailCredentialRefreshError, + RefreshedGmailCredential, refresh_gmail_access_token, ) @@ -59,7 +60,7 @@ def fake_urlopen(request, timeout: int): monkeypatch.setattr("alicebot_api.gmail.urlopen", fake_urlopen) started_at = datetime.now(UTC) - access_token, expires_at = refresh_gmail_access_token( + refreshed_credential = refresh_gmail_access_token( gmail_account_id=gmail_account_id, refresh_token="refresh-001", client_id="client-001", @@ -67,8 +68,14 @@ def fake_urlopen(request, timeout: int): ) finished_at = datetime.now(UTC) - assert access_token == "token-refreshed" - assert started_at + timedelta(seconds=3590) <= expires_at <= finished_at + timedelta(seconds=3610) + assert refreshed_credential == RefreshedGmailCredential( + access_token="token-refreshed", + access_token_expires_at=refreshed_credential.access_token_expires_at, + refresh_token=None, + ) + assert started_at + timedelta(seconds=3590) <= refreshed_credential.access_token_expires_at <= ( + finished_at + timedelta(seconds=3610) + ) assert seen == { "url": GMAIL_TOKEN_REFRESH_URL, "timeout": GMAIL_TOKEN_REFRESH_TIMEOUT_SECONDS, @@ -83,6 +90,35 @@ def fake_urlopen(request, timeout: int): } +def test_refresh_gmail_access_token_returns_rotated_refresh_token_when_provider_supplies_one( + monkeypatch, +) -> None: + gmail_account_id = uuid4() + + def fake_urlopen(_request, timeout: int): + assert timeout == GMAIL_TOKEN_REFRESH_TIMEOUT_SECONDS + return _FakeHTTPResponse( + json.dumps( + { + "access_token": "token-refreshed", + "expires_in": 3600, + "refresh_token": "refresh-rotated", + } + ).encode("utf-8") + ) + + monkeypatch.setattr("alicebot_api.gmail.urlopen", fake_urlopen) + + refreshed_credential = refresh_gmail_access_token( + gmail_account_id=gmail_account_id, + refresh_token="refresh-001", + client_id="client-001", + client_secret="secret-001", + ) + + assert refreshed_credential.refresh_token == "refresh-rotated" + + @pytest.mark.parametrize("status_code", [400, 401]) def test_refresh_gmail_access_token_maps_invalid_refresh_rejections_to_invalid_error( monkeypatch,