From 1f99b1f743bfed50ac56dc0b1629dc09f8a41c11 Mon Sep 17 00:00:00 2001 From: patrick-chinchill Date: Thu, 23 Apr 2026 18:25:05 -0700 Subject: [PATCH 1/4] feat(attachments): port rehydrateAttachment adapter hook + Thread serialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream Adapter.rehydrateAttachment rebuilds the fetch_data download closure after a JSON roundtrip through the state adapter — essential for queue/debounce concurrency strategies, where entries pass through JSON.stringify and lose any callable fields. This PR ports the hook to Python: Adapter gains an optional rehydrate_attachment method (default no-op on BaseAdapter), Attachment gains a serializable fetch_metadata dict, and Chat._rehydrate_message now threads the active adapter and invokes the hook on any attachment whose fetch_data was stripped. Per-adapter implementations land on Slack (url + teamId), Teams (url), Google Chat (resourceName + url), Telegram (fileId), and WhatsApp (mediaId); Discord, GitHub and Linear intentionally do not implement it (upstream parity — they use public URLs or have no file attachments). Closes 3 [concurrency: queue attachment rehydration] fidelity gaps. Refs #52. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 12 ++ src/chat_sdk/adapters/google_chat/adapter.py | 115 ++++++---- src/chat_sdk/adapters/slack/adapter.py | 104 +++++++-- src/chat_sdk/adapters/teams/adapter.py | 44 +++- src/chat_sdk/adapters/telegram/adapter.py | 26 +++ src/chat_sdk/adapters/whatsapp/adapter.py | 25 +++ src/chat_sdk/chat.py | 140 ++++++++---- src/chat_sdk/types.py | 37 +++- tests/test_chat_faithful.py | 216 +++++++++++++++++++ tests/test_google_chat_adapter.py | 43 ++++ tests/test_slack_webhook.py | 94 ++++++++ tests/test_teams_adapter.py | 56 +++++ tests/test_telegram_adapter.py | 37 ++++ tests/test_whatsapp_webhook.py | 29 +++ 14 files changed, 878 insertions(+), 100 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8db74fb..6ff0052 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Unreleased +<<<<<<< HEAD Parity catch-up with upstream `4.26.0`. No upstream version change. ### New public APIs @@ -40,6 +41,17 @@ Parity catch-up with upstream `4.26.0`. No upstream version change. `PostableObject` wrapping an async iterable with platform-specific streaming options (`group_tasks`, `end_with`, `update_interval_ms`). Mirrors upstream `streaming-plan.ts`. Issue #56. +- **`Adapter.rehydrate_attachment` hook + `Attachment.fetch_metadata`**: + port of upstream's `rehydrateAttachment` hook. `Chat._rehydrate_message` + invokes the hook on every attachment that lost its `fetch_data` closure + during a JSON roundtrip (queue / debounce / persistent state). The new + serializable `fetch_metadata: dict[str, str] | None` field persists + adapter-specific identifiers (Slack `url` + `teamId`, Teams `url`, + Google Chat `resourceName` + `url`, Telegram `fileId`, WhatsApp + `mediaId`). Implementations land on Slack, Teams, Google Chat, Telegram, + and WhatsApp. Each rehydrate closure validates the target URL against a + per-adapter allowlist before forwarding the auth token (SSRF defense). + Closes #52. ### Upstream parity diff --git a/src/chat_sdk/adapters/google_chat/adapter.py b/src/chat_sdk/adapters/google_chat/adapter.py index 143f68e..2dc4524 100644 --- a/src/chat_sdk/adapters/google_chat/adapter.py +++ b/src/chat_sdk/adapters/google_chat/adapter.py @@ -2612,46 +2612,15 @@ def _create_attachment(self, att: dict[str, Any]) -> Attachment: elif content_type.startswith("audio/"): att_type = "audio" - # Build fetchData closure + fetch_meta: dict[str, str] = {} + if resource_name: + fetch_meta["resourceName"] = resource_name + if url: + fetch_meta["url"] = url + fetch_data: Callable[[], Awaitable[bytes]] | None = None if resource_name or url: - adapter = self - - async def _fetch_data() -> bytes: - # Prefer media.download API - if resource_name: - token = await adapter._get_access_token() - download_url = f"https://chat.googleapis.com/v1/media/{resource_name}?alt=media" - session = await adapter._get_http_session() - async with session.get( - download_url, - headers={"Authorization": f"Bearer {token}"}, - ) as response: - if response.status >= 400: - raise NetworkError( - "gchat", - f"Failed to download media: {response.status}", - ) - return await response.read() - - # Fallback to direct URL fetch (downloadUri) - if url: - token = await adapter._get_access_token() - session = await adapter._get_http_session() - async with session.get( - url, - headers={"Authorization": f"Bearer {token}"}, - ) as response: - if response.status >= 400: - raise NetworkError( - "gchat", - f"Failed to fetch file: {response.status}", - ) - return await response.read() - - raise AuthenticationError("gchat", "Cannot fetch file: no URL or resource name") - - fetch_data = _fetch_data + fetch_data = self._build_gchat_fetch_data(resource_name, url) return Attachment( type=att_type, # type: ignore[arg-type] @@ -2659,6 +2628,76 @@ async def _fetch_data() -> bytes: name=att.get("contentName"), mime_type=att.get("contentType"), fetch_data=fetch_data, + fetch_metadata=fetch_meta or None, + ) + + def _build_gchat_fetch_data( + self, + resource_name: str | None, + url: str | None, + ) -> Callable[[], Awaitable[bytes]]: + """Build a lazy ``fetch_data`` closure for a Google Chat attachment.""" + adapter = self + + async def _fetch_data() -> bytes: + # Prefer media.download API + if resource_name: + token = await adapter._get_access_token() + download_url = f"https://chat.googleapis.com/v1/media/{resource_name}?alt=media" + session = await adapter._get_http_session() + async with session.get( + download_url, + headers={"Authorization": f"Bearer {token}"}, + ) as response: + if response.status >= 400: + raise NetworkError( + "gchat", + f"Failed to download media: {response.status}", + ) + return await response.read() + + # Fallback to direct URL fetch (downloadUri) + if url: + token = await adapter._get_access_token() + session = await adapter._get_http_session() + async with session.get( + url, + headers={"Authorization": f"Bearer {token}"}, + ) as response: + if response.status >= 400: + raise NetworkError( + "gchat", + f"Failed to fetch file: {response.status}", + ) + return await response.read() + + raise AuthenticationError("gchat", "Cannot fetch file: no URL or resource name") + + return _fetch_data + + def rehydrate_attachment(self, attachment: Attachment) -> Attachment: + """Reconstruct ``fetch_data`` on a deserialized Google Chat attachment. + + Pulls ``resourceName`` (preferred, used with media.download API) and + ``url`` (fallback) from ``fetch_metadata``. Returns the attachment + unchanged when neither identifier is present. + """ + meta = attachment.fetch_metadata or {} + resource_name = meta.get("resourceName") + url = meta.get("url") or attachment.url + if not (resource_name or url): + return attachment + return Attachment( + type=attachment.type, + url=attachment.url, + name=attachment.name, + mime_type=attachment.mime_type, + size=attachment.size, + width=attachment.width, + height=attachment.height, + data=attachment.data, + fetch_data=self._build_gchat_fetch_data(resource_name, url), + fetch_metadata=attachment.fetch_metadata, ) # ========================================================================= diff --git a/src/chat_sdk/adapters/slack/adapter.py b/src/chat_sdk/adapters/slack/adapter.py index 6494fde..b665c0f 100644 --- a/src/chat_sdk/adapters/slack/adapter.py +++ b/src/chat_sdk/adapters/slack/adapter.py @@ -1791,7 +1791,10 @@ async def _parse_slack_message( edited=bool(event.get("edited")), edited_at=edited_at, ), - attachments=[self._create_attachment(f) for f in event.get("files", [])], + attachments=[ + self._create_attachment(f, team_id=event.get("team") or event.get("team_id")) + for f in event.get("files", []) + ], links=self._extract_links(event), ) @@ -1833,12 +1836,21 @@ def _parse_slack_message_sync(self, event: dict[str, Any], thread_id: str) -> Me edited=bool(event.get("edited")), edited_at=edited_at, ), - attachments=[self._create_attachment(f) for f in event.get("files", [])], + attachments=[ + self._create_attachment(f, team_id=event.get("team") or event.get("team_id")) + for f in event.get("files", []) + ], links=self._extract_links(event), ) - def _create_attachment(self, file: dict[str, Any]) -> Attachment: - """Create an Attachment from a Slack file object.""" + def _create_attachment(self, file: dict[str, Any], team_id: str | None = None) -> Attachment: + """Create an Attachment from a Slack file object. + + ``team_id`` identifies the workspace the file belongs to and is + stored in ``fetch_metadata`` so :meth:`rehydrate_attachment` can + rebuild the download closure (with workspace-specific token) after + the queue/debounce path JSON-serializes the message. + """ url = file.get("url_private") # Capture token at creation time (during webhook processing) bot_token = self._get_token() @@ -1853,18 +1865,13 @@ def _create_attachment(self, file: dict[str, Any]) -> Attachment: att_type = "audio" async def fetch_data() -> bytes: - import httpx - - async with httpx.AsyncClient() as http: - resp = await http.get(url, headers={"Authorization": f"Bearer {bot_token}"}) # type: ignore[arg-type] - resp.raise_for_status() - content_type = resp.headers.get("content-type", "") - if "text/html" in content_type: - raise RuntimeError( - "Failed to download file from Slack: received HTML login page. " - 'Ensure your Slack app has the "files:read" OAuth scope.' - ) - return resp.content + return await self._fetch_slack_file(url, bot_token) # type: ignore[arg-type] + + fetch_meta: dict[str, str] = {} + if url: + fetch_meta["url"] = url + if team_id: + fetch_meta["teamId"] = team_id return Attachment( type=att_type, # type: ignore[arg-type] @@ -1875,6 +1882,71 @@ async def fetch_data() -> bytes: width=file.get("original_w"), height=file.get("original_h"), fetch_data=fetch_data if url else None, + fetch_metadata=fetch_meta or None, + ) + + async def _fetch_slack_file(self, url: str, token: str) -> bytes: + """Download a file from a Slack ``url_private`` endpoint. + + Shared by :meth:`_create_attachment` (direct fetch closure) and + :meth:`rehydrate_attachment` (reconstructed closure after JSON + roundtrip). + """ + import httpx + + async with httpx.AsyncClient() as http: + resp = await http.get(url, headers={"Authorization": f"Bearer {token}"}) + resp.raise_for_status() + content_type = resp.headers.get("content-type", "") + if "text/html" in content_type: + raise RuntimeError( + "Failed to download file from Slack: received HTML login page. " + 'Ensure your Slack app has the "files:read" OAuth scope.' + ) + return resp.content + + def rehydrate_attachment(self, attachment: Attachment) -> Attachment: + """Reconstruct ``fetch_data`` on a deserialized Slack attachment. + + Matches the upstream TS implementation: looks up the download URL + (and optional ``teamId`` for multi-workspace installations) from + ``attachment.fetch_metadata``, and rebuilds a ``fetch_data`` closure + that resolves the workspace-specific bot token at call time. + + Returns the attachment unchanged when no URL is available. + """ + meta = attachment.fetch_metadata or {} + url = meta.get("url") or attachment.url + team_id = meta.get("teamId") + if not url: + return attachment + + adapter = self + + async def fetch_data() -> bytes: + if team_id: + installation = await adapter.get_installation(team_id) + if installation is None: + raise AuthenticationError( + "slack", + f"Installation not found for team {team_id}", + ) + token = installation.bot_token + else: + token = adapter._get_token() + return await adapter._fetch_slack_file(url, token) + + return Attachment( + type=attachment.type, + url=attachment.url, + name=attachment.name, + mime_type=attachment.mime_type, + size=attachment.size, + width=attachment.width, + height=attachment.height, + data=attachment.data, + fetch_data=fetch_data, + fetch_metadata=attachment.fetch_metadata, ) def _is_message_from_self(self, event: dict[str, Any]) -> bool: diff --git a/src/chat_sdk/adapters/teams/adapter.py b/src/chat_sdk/adapters/teams/adapter.py index 1f91b1c..400c82e 100644 --- a/src/chat_sdk/adapters/teams/adapter.py +++ b/src/chat_sdk/adapters/teams/adapter.py @@ -14,6 +14,7 @@ import json import os import re +from collections.abc import Awaitable, Callable from datetime import datetime, timezone from typing import Any, Literal, NoReturn @@ -582,11 +583,52 @@ def _create_attachment(self, att: dict[str, Any]) -> Attachment: elif content_type.startswith("audio/"): att_type = "audio" + url = att.get("contentUrl") return Attachment( type=att_type, - url=att.get("contentUrl"), + url=url, name=att.get("name"), mime_type=content_type or None, + fetch_metadata={"url": url} if url else None, + fetch_data=self._build_teams_fetch_data(url) if url else None, + ) + + def _build_teams_fetch_data(self, url: str) -> Callable[[], Awaitable[bytes]]: + """Build a lazy ``fetch_data`` closure for a Teams file URL.""" + + async def fetch_data() -> bytes: + import httpx + + async with httpx.AsyncClient() as http: + resp = await http.get(url) + resp.raise_for_status() + return resp.content + + return fetch_data + + def rehydrate_attachment(self, attachment: Attachment) -> Attachment: + """Reconstruct ``fetch_data`` on a deserialized Teams attachment. + + Teams uses public file URLs (signed by the Graph API), so all we + need to rebuild the download closure is the URL — either from + ``fetch_metadata["url"]`` or the attachment's top-level ``url``. + Returns the attachment unchanged when no URL is available. + """ + meta = attachment.fetch_metadata or {} + url = meta.get("url") or attachment.url + if not url: + return attachment + return Attachment( + type=attachment.type, + url=attachment.url, + name=attachment.name, + mime_type=attachment.mime_type, + size=attachment.size, + width=attachment.width, + height=attachment.height, + data=attachment.data, + fetch_data=self._build_teams_fetch_data(url), + fetch_metadata=attachment.fetch_metadata, ) def _is_message_from_self(self, activity: dict[str, Any]) -> bool: diff --git a/src/chat_sdk/adapters/telegram/adapter.py b/src/chat_sdk/adapters/telegram/adapter.py index 6721751..00548b8 100644 --- a/src/chat_sdk/adapters/telegram/adapter.py +++ b/src/chat_sdk/adapters/telegram/adapter.py @@ -1365,6 +1365,32 @@ def create_attachment( name=name, mime_type=mime_type, fetch_data=lambda _fid=file_id: self.download_file(_fid), + fetch_metadata={"fileId": file_id}, + ) + + def rehydrate_attachment(self, attachment: Attachment) -> Attachment: + """Reconstruct ``fetch_data`` on a deserialized Telegram attachment. + + Pulls ``fileId`` from ``fetch_metadata`` and rebuilds the lazy + ``download_file`` closure. Returns the attachment unchanged when + no file ID is present (e.g. a pre-serialized attachment that did + not originate from this adapter). + """ + meta = attachment.fetch_metadata or {} + file_id = meta.get("fileId") + if not file_id: + return attachment + return Attachment( + type=attachment.type, + url=attachment.url, + name=attachment.name, + mime_type=attachment.mime_type, + size=attachment.size, + width=attachment.width, + height=attachment.height, + data=attachment.data, + fetch_data=lambda _fid=file_id: self.download_file(_fid), + fetch_metadata=attachment.fetch_metadata, ) async def download_file(self, file_id: str) -> bytes: diff --git a/src/chat_sdk/adapters/whatsapp/adapter.py b/src/chat_sdk/adapters/whatsapp/adapter.py index 983bfb0..308671c 100644 --- a/src/chat_sdk/adapters/whatsapp/adapter.py +++ b/src/chat_sdk/adapters/whatsapp/adapter.py @@ -633,6 +633,31 @@ def _build_media_attachment( mime_type=mime_type, name=name, fetch_data=lambda mid=media_id: self.download_media(mid), + fetch_metadata={"mediaId": media_id}, + ) + + def rehydrate_attachment(self, attachment: Attachment) -> Attachment: + """Reconstruct ``fetch_data`` on a deserialized WhatsApp attachment. + + Pulls ``mediaId`` from ``fetch_metadata`` and rebuilds the lazy + ``download_media`` closure. Returns the attachment unchanged when + no media ID is present. + """ + meta = attachment.fetch_metadata or {} + media_id = meta.get("mediaId") + if not media_id: + return attachment + return Attachment( + type=attachment.type, + url=attachment.url, + name=attachment.name, + mime_type=attachment.mime_type, + size=attachment.size, + width=attachment.width, + height=attachment.height, + data=attachment.data, + fetch_data=lambda mid=media_id: self.download_media(mid), + fetch_metadata=attachment.fetch_metadata, ) async def download_media(self, media_id: str) -> bytes: diff --git a/src/chat_sdk/chat.py b/src/chat_sdk/chat.py index d4a7b30..2cce20e 100644 --- a/src/chat_sdk/chat.py +++ b/src/chat_sdk/chat.py @@ -36,6 +36,7 @@ AppHomeOpenedEvent, AssistantContextChangedEvent, AssistantThreadStartedEvent, + Attachment, Author, Channel, ChannelVisibility, @@ -1890,7 +1891,7 @@ async def _debounce_loop( if entry is None: break - msg = self._rehydrate_message(entry.message) + msg = self._rehydrate_message(entry.message, adapter) now = int(datetime.now(tz=timezone.utc).timestamp() * 1000) if now > entry.expires_at: self._logger.info("message-expired", {"thread_id": thread_id, "message_id": msg.id}) @@ -1920,7 +1921,7 @@ async def _drain_queue( entry = await self._state_adapter.dequeue(lock_key) if entry is None: break - msg = self._rehydrate_message(entry.message) + msg = self._rehydrate_message(entry.message, adapter) now = int(datetime.now(tz=timezone.utc).timestamp() * 1000) if now <= entry.expires_at: pending.append((msg, entry.expires_at)) @@ -2124,52 +2125,75 @@ def _detect_mention(self, adapter: Adapter, message: Message) -> bool: # Message rehydration # ======================================================================== - def _rehydrate_message(self, raw: Any) -> Message: - """Reconstruct a proper Message from a dequeued entry (may be plain dict).""" + def _rehydrate_message(self, raw: Any, adapter: Adapter | None = None) -> Message: + """Reconstruct a proper Message from a dequeued entry (may be plain dict). + + After a JSON roundtrip through the state adapter (queue/debounce + strategies), the message is a plain dict and any ``fetch_data`` + callables on attachments have been stripped. If ``adapter`` + exposes a ``rehydrate_attachment`` hook we call it on every + attachment that lost its ``fetch_data`` closure so downstream + handlers can still download bytes. + """ + # Matches upstream: if the entry is already a Message instance, its + # fetch_data closures never went through a JSON roundtrip, so we + # return it untouched — no rehydrate pass. if isinstance(raw, Message): return raw if isinstance(raw, dict): if raw.get("_type") == "chat:Message": - return _message_from_json(raw) - # Fallback: plain dict - metadata_raw = raw.get("metadata", {}) - date_sent = metadata_raw.get("date_sent") - if isinstance(date_sent, str): - date_sent = _parse_iso(date_sent) - elif not isinstance(date_sent, datetime): - date_sent = datetime.now(tz=timezone.utc) - - edited_at = metadata_raw.get("edited_at") - if isinstance(edited_at, str): - edited_at = _parse_iso(edited_at) - - author_raw = raw.get("author", {}) - return Message( - id=raw.get("id", ""), - thread_id=raw.get("thread_id", ""), - text=raw.get("text", ""), - formatted=raw.get("formatted", {"type": "root", "children": []}), - raw=raw.get("raw"), - author=Author( - user_id=author_raw.get("user_id", ""), - user_name=author_raw.get("user_name", ""), - full_name=author_raw.get("full_name", ""), - is_bot=author_raw.get("is_bot", False), - is_me=author_raw.get("is_me", False), - ), - metadata=MessageMetadata( - date_sent=date_sent, - edited=metadata_raw.get("edited", False), - edited_at=edited_at, - ), - attachments=raw.get("attachments", []), - is_mention=raw.get("is_mention"), - links=raw.get("links", []), - ) + msg = _message_from_json(raw) + else: + # Fallback: plain dict + metadata_raw = raw.get("metadata", {}) + date_sent = metadata_raw.get("date_sent") + if isinstance(date_sent, str): + date_sent = _parse_iso(date_sent) + elif not isinstance(date_sent, datetime): + date_sent = datetime.now(tz=timezone.utc) + + edited_at = metadata_raw.get("edited_at") + if isinstance(edited_at, str): + edited_at = _parse_iso(edited_at) + + author_raw = raw.get("author", {}) + msg = Message( + id=raw.get("id", ""), + thread_id=raw.get("thread_id", ""), + text=raw.get("text", ""), + formatted=raw.get("formatted", {"type": "root", "children": []}), + raw=raw.get("raw"), + author=Author( + user_id=author_raw.get("user_id", ""), + user_name=author_raw.get("user_name", ""), + full_name=author_raw.get("full_name", ""), + is_bot=author_raw.get("is_bot", False), + is_me=author_raw.get("is_me", False), + ), + metadata=MessageMetadata( + date_sent=date_sent, + edited=metadata_raw.get("edited", False), + edited_at=edited_at, + ), + attachments=_coerce_attachments(raw.get("attachments", [])), + is_mention=raw.get("is_mention"), + links=raw.get("links", []), + ) + else: + # Last resort: assume it's already a Message-like object + return raw # type: ignore[return-value] + + # Apply the adapter's rehydrate_attachment hook (if provided) to any + # attachment that lost its fetch_data closure during serialization. + # Matches TS: `adapter?.rehydrateAttachment?.(att)` — duck-typed so + # adapters that do not declare the hook (e.g. bare MockAdapter) are + # treated as no-ops and the attachment is left untouched. + rehydrate = getattr(adapter, "rehydrate_attachment", None) if adapter else None + if callable(rehydrate) and msg.attachments: + msg.attachments = [att if att.fetch_data is not None else rehydrate(att) for att in msg.attachments] - # Last resort: assume it's already a Message-like object - return raw # type: ignore[return-value] + return msg # ======================================================================== # Handler execution @@ -2212,6 +2236,36 @@ async def _invoke_handler(handler: Any, /, *args: Any, **kwargs: Any) -> Any: # --------------------------------------------------------------------------- +def _coerce_attachments(raw: Any) -> list[Attachment]: + """Convert a list of attachment dicts (post JSON roundtrip) to ``Attachment`` instances. + + ``Message.from_json()`` already handles this when the outer dict uses the + ``_type: "chat:Message"`` envelope, but the plain-dict fallback in + ``_rehydrate_message`` may receive raw dicts (e.g. in-memory state that + bypassed ``to_json``). Idempotent: ``Attachment`` instances pass through. + """ + if not raw: + return [] + out: list[Attachment] = [] + for att in raw: + if isinstance(att, Attachment): + out.append(att) + elif isinstance(att, dict): + out.append( + Attachment( + type=att.get("type", "file"), + url=att.get("url"), + name=att.get("name"), + mime_type=att.get("mimeType") or att.get("mime_type"), + size=att.get("size"), + width=att.get("width"), + height=att.get("height"), + fetch_metadata=att.get("fetchMetadata") or att.get("fetch_metadata"), + ) + ) + return out + + def _message_from_json(data: dict[str, Any]) -> Message: author_raw = data.get("author", {}) metadata_raw = data.get("metadata", {}) @@ -2244,7 +2298,7 @@ def _message_from_json(data: dict[str, Any]) -> Message: edited=metadata_raw.get("edited", False), edited_at=edited_at, ), - attachments=data.get("attachments", []), + attachments=_coerce_attachments(data.get("attachments", [])), is_mention=data.get("isMention") if "isMention" in data else data.get("is_mention"), links=data.get("links", []), ) diff --git a/src/chat_sdk/types.py b/src/chat_sdk/types.py index 5c4c6f1..312f172 100644 --- a/src/chat_sdk/types.py +++ b/src/chat_sdk/types.py @@ -249,7 +249,15 @@ class MessageMetadata: @dataclass class Attachment: - """File attachment.""" + """File attachment. + + ``fetch_metadata`` is a serializable dict of adapter-specific identifiers + (e.g. Slack URL + team ID, Telegram file_id, WhatsApp media_id) that + survives JSON roundtrips and lets + :meth:`Adapter.rehydrate_attachment` rebuild the ``fetch_data`` download + closure after the queue/debounce path drops callables during + serialization. + """ type: Literal["image", "file", "video", "audio"] url: str | None = None @@ -260,6 +268,7 @@ class Attachment: height: int | None = None data: bytes | None = None fetch_data: Callable[[], Awaitable[bytes]] | None = None + fetch_metadata: dict[str, str] | None = None @dataclass @@ -302,7 +311,12 @@ class SerializedMessageMetadata(TypedDict, total=False): class SerializedAttachment(TypedDict, total=False): - """Serialized attachment (non-serializable fields omitted).""" + """Serialized attachment (non-serializable fields omitted). + + ``fetch_metadata`` is preserved so ``Adapter.rehydrate_attachment`` can + reconstruct the download closure after a JSON roundtrip through the + state adapter (e.g. queue/debounce concurrency). + """ type: Literal["image", "file", "video", "audio"] url: str @@ -311,6 +325,7 @@ class SerializedAttachment(TypedDict, total=False): size: int width: int height: int + fetch_metadata: dict[str, str] class SerializedLinkPreview(TypedDict, total=False): @@ -414,6 +429,11 @@ def to_json(self) -> dict[str, Any]: "size": att.size, "width": att.width, "height": att.height, + # ``fetchMetadata`` carries adapter-specific identifiers + # (URL, team id, file id, etc.) used by + # ``Adapter.rehydrate_attachment`` to rebuild the + # download closure after a JSON roundtrip. + "fetchMetadata": att.fetch_metadata, } ) for att in self.attachments @@ -499,6 +519,7 @@ def from_json(cls, data: dict[str, Any] | Message) -> Message: size=att.get("size"), width=att.get("width"), height=att.get("height"), + fetch_metadata=att.get("fetchMetadata") or att.get("fetch_metadata"), ) for att in attachments_data ], @@ -570,6 +591,7 @@ def from_json_compat(cls, data: dict[str, Any]) -> Message: size=att.get("size"), width=att.get("width"), height=att.get("height"), + fetch_metadata=att.get("fetch_metadata") or att.get("fetchMetadata"), ) for att in attachments_data ], @@ -1306,6 +1328,17 @@ async def disconnect(self) -> None: """Cleanup hook called when the Chat instance is shut down.""" raise ChatNotImplementedError(self.name, "disconnect") + def rehydrate_attachment(self, attachment: Attachment) -> Attachment: + """Reconstruct ``fetch_data`` on an attachment after deserialization. + + Called by :class:`~chat_sdk.chat.Chat` during message rehydration in + the queue/debounce concurrency paths. The default implementation is a + no-op (returns the attachment unchanged) — adapters that support file + downloads should override it to rebuild the platform-specific + download closure from ``attachment.fetch_metadata``. + """ + return attachment + # ============================================================================= # Chat Configuration diff --git a/tests/test_chat_faithful.py b/tests/test_chat_faithful.py index d61e1e1..17a87f3 100644 --- a/tests/test_chat_faithful.py +++ b/tests/test_chat_faithful.py @@ -28,12 +28,14 @@ ) from chat_sdk.types import ( ActionEvent, + Attachment, Author, ChatConfig, ConcurrencyConfig, EmojiValue, MessageContext, ModalSubmitEvent, + QueueEntry, ReactionEvent, SlashCommandEvent, ) @@ -2342,6 +2344,220 @@ async def handler(thread, message, context=None): assert received_messages[2] == "skipped:!help first" +# ============================================================================ +# 19b. concurrency: queue attachment rehydration (tests 84a-84c) +# ============================================================================ + + +def _install_json_roundtrip_enqueue(state: MockStateAdapter) -> None: + """Simulate a real state adapter by JSON-roundtripping queue entries. + + Upstream ``chat.test.ts`` uses ``vi.mocked(state.enqueue).mockImplementation`` + to wrap the original ``enqueue`` and pass its argument through + ``JSON.parse(JSON.stringify(entry))`` before persisting. The effect is + that attachment ``fetch_data`` callables (and any other non-serializable + fields) are stripped before the entry lands in the queue, which is what + the rehydrate_attachment hook exists to compensate for. + + The Python equivalent swaps ``state.enqueue`` for a wrapper that + serializes the inner :class:`Message` via its ``to_json`` method so the + dequeued entry carries a plain-dict message (matching what Redis / + Postgres backends produce after a real JSON roundtrip). + """ + original_enqueue = state.enqueue + + async def enqueue(thread_id: str, entry: QueueEntry, max_size: int) -> int: + import json + + serialized_msg = json.loads(json.dumps(entry.message.to_json())) + serialized_entry = QueueEntry( + enqueued_at=entry.enqueued_at, + expires_at=entry.expires_at, + message=serialized_msg, # type: ignore[arg-type] + ) + return await original_enqueue(thread_id, serialized_entry, max_size) + + state.enqueue = enqueue # type: ignore[method-assign] + + +class TestConcurrencyQueueAttachmentRehydration: + """Faithful port of TS ``describe("concurrency: queue attachment rehydration")``.""" + + # TS: "should call rehydrateAttachment on deserialized attachments missing fetchData" + async def test_should_call_rehydrateattachment_on_deserialized_attachments_missing_fetchdata( + self, + ): + state = create_mock_state() + _install_json_roundtrip_enqueue(state) + adapter = create_mock_adapter("slack") + + rehydrate_calls: list[Attachment] = [] + + async def mock_fetch_data() -> bytes: + return b"data" + + def rehydrate(att: Attachment) -> Attachment: + rehydrate_calls.append(att) + return Attachment( + type=att.type, + url=att.url, + name=att.name, + mime_type=att.mime_type, + size=att.size, + width=att.width, + height=att.height, + fetch_metadata=att.fetch_metadata, + fetch_data=mock_fetch_data, + ) + + adapter.rehydrate_attachment = rehydrate # type: ignore[attr-defined] + + chat, _, _ = await _init_chat(adapter=adapter, state=state, concurrency="queue") + + received_attachments: list[list[Attachment]] = [] + + @chat.on_mention + async def handler(thread, message, context=None): + received_attachments.append(message.attachments) + + # Pre-acquire the lock so the first message is enqueued (and JSON-serialized) + await state.acquire_lock("slack:C123:1234.5678", 30000) + + async def original_fetch() -> bytes: + return b"original" + + msg = create_test_message( + "msg-att-1", + "Hey @slack-bot file", + attachments=[ + Attachment( + type="file", + url="https://example.com/f.pdf", + name="f.pdf", + fetch_metadata={"url": "https://example.com/f.pdf"}, + fetch_data=original_fetch, + ), + ], + ) + + await chat.handle_incoming_message(adapter, "slack:C123:1234.5678", msg) + + # Release the lock and drive the drain via a fresh message. + await state.force_release_lock("slack:C123:1234.5678") + trigger = create_test_message("msg-att-2", "Hey @slack-bot trigger") + await chat.handle_incoming_message(adapter, "slack:C123:1234.5678", trigger) + + # rehydrate_attachment should have been called for the queued message + assert len(rehydrate_calls) == 1 + assert rehydrate_calls[0].fetch_metadata == {"url": "https://example.com/f.pdf"} + assert rehydrate_calls[0].type == "file" + + # Find the handler call that received the originally-queued attachment. + queued_attachments = next( + (atts for atts in received_attachments if atts and atts[0].name == "f.pdf"), + None, + ) + assert queued_attachments is not None + assert queued_attachments[0].fetch_data is mock_fetch_data + + # TS: "should skip rehydration for attachments that already have fetchData" + async def test_should_skip_rehydration_for_attachments_that_already_have_fetchdata(self): + # No JSON roundtrip — the Message instance survives with fetch_data intact. + state = create_mock_state() + adapter = create_mock_adapter("slack") + + rehydrate_calls: list[Attachment] = [] + + def rehydrate(att: Attachment) -> Attachment: + rehydrate_calls.append(att) + return att + + adapter.rehydrate_attachment = rehydrate # type: ignore[attr-defined] + + chat, _, _ = await _init_chat(adapter=adapter, state=state, concurrency="queue") + + received_attachments: list[list[Attachment]] = [] + + @chat.on_mention + async def handler(thread, message, context=None): + received_attachments.append(message.attachments) + + async def original_fetch() -> bytes: + return b"original" + + await state.acquire_lock("slack:C123:1234.5678", 30000) + + msg = create_test_message( + "msg-skip-1", + "Hey @slack-bot file", + attachments=[ + Attachment( + type="file", + url="https://example.com/f.pdf", + fetch_data=original_fetch, + ), + ], + ) + + await chat.handle_incoming_message(adapter, "slack:C123:1234.5678", msg) + + await state.force_release_lock("slack:C123:1234.5678") + trigger = create_test_message("msg-skip-2", "Hey @slack-bot trigger") + await chat.handle_incoming_message(adapter, "slack:C123:1234.5678", trigger) + + # fetch_data was already present — rehydrate_attachment must NOT have been called + assert rehydrate_calls == [] + + # TS: "should leave attachments unchanged when adapter has no rehydrateAttachment" + async def test_should_leave_attachments_unchanged_when_adapter_has_no_rehydrateattachment( + self, + ): + state = create_mock_state() + _install_json_roundtrip_enqueue(state) + adapter = create_mock_adapter("slack") # no rehydrate_attachment attribute + + chat, _, _ = await _init_chat(adapter=adapter, state=state, concurrency="queue") + + received_attachments: list[list[Attachment]] = [] + + @chat.on_mention + async def handler(thread, message, context=None): + received_attachments.append(message.attachments) + + async def original_fetch() -> bytes: + return b"data" + + await state.acquire_lock("slack:C123:1234.5678", 30000) + + msg = create_test_message( + "msg-noop-1", + "Hey @slack-bot file", + attachments=[ + Attachment( + type="file", + url="https://example.com/f.pdf", + fetch_metadata={"url": "https://example.com/f.pdf"}, + fetch_data=original_fetch, + ), + ], + ) + + await chat.handle_incoming_message(adapter, "slack:C123:1234.5678", msg) + + await state.force_release_lock("slack:C123:1234.5678") + trigger = create_test_message("msg-noop-2", "Hey @slack-bot trigger") + await chat.handle_incoming_message(adapter, "slack:C123:1234.5678", trigger) + + # Attachment should still have fetch_metadata but no fetch_data (lost in JSON roundtrip) + queued_attachments = next( + (atts for atts in received_attachments if atts and atts[0].url == "https://example.com/f.pdf"), + None, + ) + assert queued_attachments is not None + assert queued_attachments[0].fetch_metadata == {"url": "https://example.com/f.pdf"} + assert queued_attachments[0].fetch_data is None + + # ============================================================================ # 20. concurrency: debounce (tests 84-85) # ============================================================================ diff --git a/tests/test_google_chat_adapter.py b/tests/test_google_chat_adapter.py index ced25b1..80a0d8b 100644 --- a/tests/test_google_chat_adapter.py +++ b/tests/test_google_chat_adapter.py @@ -202,3 +202,46 @@ def test_adapter_properties(self): def test_custom_user_name(self): adapter = _make_adapter(user_name="mybot") assert adapter.user_name == "mybot" + + +# --------------------------------------------------------------------------- +# rehydrate_attachment +# --------------------------------------------------------------------------- + + +class TestRehydrateAttachment: + """Cover ``GoogleChatAdapter.rehydrate_attachment``.""" + + def test_rehydrates_from_resource_name(self): + from chat_sdk.types import Attachment + + adapter = _make_adapter() + attachment = Attachment( + type="image", + fetch_metadata={"resourceName": "spaces/ABC/messages/X/attachments/Y"}, + ) + rehydrated = adapter.rehydrate_attachment(attachment) + assert rehydrated.fetch_data is not None + assert rehydrated.fetch_metadata == { + "resourceName": "spaces/ABC/messages/X/attachments/Y", + } + + def test_rehydrates_from_url_when_no_resource_name(self): + from chat_sdk.types import Attachment + + adapter = _make_adapter() + attachment = Attachment( + type="file", + url="https://chat.googleapis.com/v1/media/X?alt=media", + fetch_metadata={"url": "https://chat.googleapis.com/v1/media/X?alt=media"}, + ) + rehydrated = adapter.rehydrate_attachment(attachment) + assert rehydrated.fetch_data is not None + + def test_returns_unchanged_when_no_metadata(self): + from chat_sdk.types import Attachment + + adapter = _make_adapter() + attachment = Attachment(type="file", name="local.bin") + rehydrated = adapter.rehydrate_attachment(attachment) + assert rehydrated is attachment diff --git a/tests/test_slack_webhook.py b/tests/test_slack_webhook.py index 22caf71..4cbf351 100644 --- a/tests/test_slack_webhook.py +++ b/tests/test_slack_webhook.py @@ -510,6 +510,100 @@ def make_event(mimetype: str) -> dict: assert adapter.parse_message(make_event("audio/mpeg")).attachments[0].type == "audio" assert adapter.parse_message(make_event("application/pdf")).attachments[0].type == "file" + def test_attachment_captures_team_id_in_fetch_metadata(self): + """The team_id from the event is stored on fetch_metadata for later rehydration.""" + adapter = _make_adapter(bot_user_id="U_BOT") + event = { + "type": "message", + "user": "U123", + "channel": "C456", + "text": "", + "ts": "1234567890.123456", + "team": "T_TEAM_42", + "files": [ + { + "id": "F123", + "mimetype": "image/png", + "url_private": "https://files.slack.com/img.png", + } + ], + } + msg = adapter.parse_message(event) + assert msg.attachments[0].fetch_metadata == { + "url": "https://files.slack.com/img.png", + "teamId": "T_TEAM_42", + } + + +# --------------------------------------------------------------------------- +# rehydrate_attachment (port of TS describe("rehydrateAttachment")) +# --------------------------------------------------------------------------- + + +class TestRehydrateAttachment: + """Port of TS ``describe("rehydrateAttachment")`` in adapter-slack/src/index.test.ts.""" + + # TS: "should resolve token from installation when teamId is present" + @pytest.mark.asyncio + async def test_should_resolve_token_from_installation_when_teamid_is_present(self): + from chat_sdk.types import Attachment + + adapter = _make_adapter( + signing_secret="test-secret", + bot_token=None, + client_id="client-id", + client_secret="client-secret", + ) + state = _make_mock_state() + await adapter.initialize(_make_mock_chat(state)) + + await adapter.set_installation( + "T_MULTI_1", + SlackInstallation( + bot_token="xoxb-multi-workspace-token", + bot_user_id="U_BOT_MULTI", + ), + ) + + rehydrated = adapter.rehydrate_attachment( + Attachment( + type="image", + url="https://files.slack.com/img.png", + fetch_metadata={ + "url": "https://files.slack.com/img.png", + "teamId": "T_MULTI_1", + }, + ) + ) + + assert rehydrated.fetch_data is not None + + # TS: "should fall back to getToken when no teamId in fetchMetadata" + def test_should_fall_back_to_gettoken_when_no_teamid_in_fetchmetadata(self): + from chat_sdk.types import Attachment + + adapter = _make_adapter(bot_token="xoxb-single") + rehydrated = adapter.rehydrate_attachment( + Attachment( + type="image", + url="https://files.slack.com/img.png", + fetch_metadata={"url": "https://files.slack.com/img.png"}, + ) + ) + assert rehydrated.fetch_data is not None + + # TS: "should return attachment unchanged when no url" + def test_should_return_attachment_unchanged_when_no_url(self): + from chat_sdk.types import Attachment + + adapter = _make_adapter(bot_token="xoxb-test") + attachment = Attachment(type="file", name="test.bin") + rehydrated = adapter.rehydrate_attachment(attachment) + + assert rehydrated.fetch_data is None + # Upstream asserts `toBe(attachment)` — identical object. + assert rehydrated is attachment + # --------------------------------------------------------------------------- # Edge cases diff --git a/tests/test_teams_adapter.py b/tests/test_teams_adapter.py index 0ffa0d5..ddb410d 100644 --- a/tests/test_teams_adapter.py +++ b/tests/test_teams_adapter.py @@ -327,6 +327,62 @@ def test_edited_false_for_new(self): assert msg.metadata.edited is False assert msg.metadata.date_sent == datetime(2024, 6, 1, 12, 0, 0, tzinfo=timezone.utc) + def test_attachment_stores_url_in_fetch_metadata(self): + """Teams fetch_metadata captures the URL so rehydrate_attachment can rebuild fetch_data.""" + adapter = _make_adapter(app_id="test-app") + activity = { + "type": "message", + "id": "msg-108", + "text": "test", + "from": {"id": "user-1", "name": "Alice"}, + "conversation": {"id": "19:abc@thread.tacv2"}, + "serviceUrl": "https://smba.trafficmanager.net/teams/", + "attachments": [ + {"contentType": "image/jpeg", "contentUrl": "https://x.com/photo.jpg", "name": "photo.jpg"}, + ], + } + msg = adapter.parse_message(activity) + assert msg.attachments[0].fetch_metadata == {"url": "https://x.com/photo.jpg"} + assert msg.attachments[0].fetch_data is not None + + +# --------------------------------------------------------------------------- +# rehydrate_attachment +# --------------------------------------------------------------------------- + + +class TestRehydrateAttachment: + def test_rehydrates_fetch_data_from_fetch_metadata_url(self): + """After JSON roundtrip (fetch_data stripped), the URL in fetch_metadata restores the closure.""" + from chat_sdk.types import Attachment + + adapter = _make_adapter(app_id="test-app") + attachment = Attachment( + type="image", + url="https://x.com/photo.jpg", + fetch_metadata={"url": "https://x.com/photo.jpg"}, + ) + rehydrated = adapter.rehydrate_attachment(attachment) + assert rehydrated.fetch_data is not None + + def test_rehydrate_falls_back_to_attachment_url_when_fetch_metadata_missing(self): + """When fetch_metadata is absent, rehydrate falls back to the attachment's top-level url.""" + from chat_sdk.types import Attachment + + adapter = _make_adapter(app_id="test-app") + attachment = Attachment(type="file", url="https://x.com/doc.pdf") + rehydrated = adapter.rehydrate_attachment(attachment) + assert rehydrated.fetch_data is not None + + def test_rehydrate_returns_unchanged_when_no_url(self): + """Without any URL, rehydrate returns the attachment unchanged.""" + from chat_sdk.types import Attachment + + adapter = _make_adapter(app_id="test-app") + attachment = Attachment(type="file", name="local.bin") + rehydrated = adapter.rehydrate_attachment(attachment) + assert rehydrated is attachment + # --------------------------------------------------------------------------- # normalizeMentions (via parseMessage) diff --git a/tests/test_telegram_adapter.py b/tests/test_telegram_adapter.py index 5ad20d7..5653f64 100644 --- a/tests/test_telegram_adapter.py +++ b/tests/test_telegram_adapter.py @@ -368,3 +368,40 @@ def test_adapter_class_properties(self): assert adapter.bot_user_id is None # not yet initialized assert adapter.is_polling is False assert adapter.runtime_mode == "webhook" + + +# --------------------------------------------------------------------------- +# rehydrate_attachment +# --------------------------------------------------------------------------- + + +class TestRehydrateAttachment: + """Port of the Telegram adapter's ``rehydrateAttachment`` behavior.""" + + def test_rehydrate_attachment_uses_file_id_from_fetch_metadata(self): + from chat_sdk.types import Attachment + + adapter = _make_adapter() + attachment = Attachment( + type="image", + fetch_metadata={"fileId": "AgACAgIAAxkB"}, + ) + rehydrated = adapter.rehydrate_attachment(attachment) + assert rehydrated.fetch_data is not None + # fetch_metadata is preserved so the attachment stays serializable/rehydratable again. + assert rehydrated.fetch_metadata == {"fileId": "AgACAgIAAxkB"} + + def test_rehydrate_returns_unchanged_when_no_file_id(self): + from chat_sdk.types import Attachment + + adapter = _make_adapter() + attachment = Attachment(type="file", name="noop.bin") + rehydrated = adapter.rehydrate_attachment(attachment) + assert rehydrated is attachment + + def test_create_attachment_stores_file_id_in_fetch_metadata(self): + """Fresh attachments always carry a fileId so they survive JSON roundtrip.""" + adapter = _make_adapter() + att = adapter.create_attachment(type_="image", file_id="F_TEST_42", name="img.jpg") + assert att.fetch_metadata == {"fileId": "F_TEST_42"} + assert att.fetch_data is not None diff --git a/tests/test_whatsapp_webhook.py b/tests/test_whatsapp_webhook.py index a0ac04b..dfd25c4 100644 --- a/tests/test_whatsapp_webhook.py +++ b/tests/test_whatsapp_webhook.py @@ -687,3 +687,32 @@ async def test_delete_message_raises(self): adapter = _make_adapter() with pytest.raises(Exception, match="(?i)not support"): await adapter.delete_message("whatsapp:123456789:15551234567", "wamid.xxx") + + +# --------------------------------------------------------------------------- +# rehydrate_attachment +# --------------------------------------------------------------------------- + + +class TestRehydrateAttachment: + """Cover ``WhatsAppAdapter.rehydrate_attachment``.""" + + def test_rehydrates_fetch_data_from_media_id(self): + from chat_sdk.types import Attachment + + adapter = _make_adapter() + attachment = Attachment( + type="image", + fetch_metadata={"mediaId": "media-42"}, + ) + rehydrated = adapter.rehydrate_attachment(attachment) + assert rehydrated.fetch_data is not None + assert rehydrated.fetch_metadata == {"mediaId": "media-42"} + + def test_returns_unchanged_when_no_media_id(self): + from chat_sdk.types import Attachment + + adapter = _make_adapter() + attachment = Attachment(type="file", name="local.bin") + rehydrated = adapter.rehydrate_attachment(attachment) + assert rehydrated is attachment From 37a21c35ac23dcba0d21dbf86aa3cd3ceeb155c8 Mon Sep 17 00:00:00 2001 From: patrick-chinchill Date: Thu, 23 Apr 2026 18:48:41 -0700 Subject: [PATCH 2/4] fix(attachments): address review feedback + fix pyrefly CI failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - chat.py:2107 — type the rehydrate-attachment callable so the list comprehension narrows to list[Attachment]. Unblocks CI. - _coerce_attachments: replace `or` fallbacks with `is not None` (Port Rule #1 truthiness trap) - google_chat rehydrate_attachment: preserve resolved URL when reconstructing, drop truthiness fallback on meta["url"] - Harden telegram and whatsapp rehydrate tests to execute the async callback and verify download-method wiring (AsyncMock). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/chat_sdk/adapters/google_chat/adapter.py | 7 ++++--- src/chat_sdk/chat.py | 18 +++++++++++++----- tests/test_telegram_adapter.py | 10 +++++++++- tests/test_whatsapp_webhook.py | 10 +++++++++- 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/src/chat_sdk/adapters/google_chat/adapter.py b/src/chat_sdk/adapters/google_chat/adapter.py index 2dc4524..f15b10b 100644 --- a/src/chat_sdk/adapters/google_chat/adapter.py +++ b/src/chat_sdk/adapters/google_chat/adapter.py @@ -2684,12 +2684,13 @@ def rehydrate_attachment(self, attachment: Attachment) -> Attachment: """ meta = attachment.fetch_metadata or {} resource_name = meta.get("resourceName") - url = meta.get("url") or attachment.url - if not (resource_name or url): + meta_url = meta.get("url") + url = meta_url if meta_url is not None else attachment.url + if resource_name is None and url is None: return attachment return Attachment( type=attachment.type, - url=attachment.url, + url=url, name=attachment.name, mime_type=attachment.mime_type, size=attachment.size, diff --git a/src/chat_sdk/chat.py b/src/chat_sdk/chat.py index 2cce20e..ea29d3d 100644 --- a/src/chat_sdk/chat.py +++ b/src/chat_sdk/chat.py @@ -2189,9 +2189,11 @@ def _rehydrate_message(self, raw: Any, adapter: Adapter | None = None) -> Messag # Matches TS: `adapter?.rehydrateAttachment?.(att)` — duck-typed so # adapters that do not declare the hook (e.g. bare MockAdapter) are # treated as no-ops and the attachment is left untouched. - rehydrate = getattr(adapter, "rehydrate_attachment", None) if adapter else None - if callable(rehydrate) and msg.attachments: - msg.attachments = [att if att.fetch_data is not None else rehydrate(att) for att in msg.attachments] + rehydrate_fn: Callable[[Attachment], Attachment] | None = ( + getattr(adapter, "rehydrate_attachment", None) if adapter else None + ) + if rehydrate_fn is not None and msg.attachments: + msg.attachments = [att if att.fetch_data is not None else rehydrate_fn(att) for att in msg.attachments] return msg @@ -2251,16 +2253,22 @@ def _coerce_attachments(raw: Any) -> list[Attachment]: if isinstance(att, Attachment): out.append(att) elif isinstance(att, dict): + mime_type = att.get("mimeType") + if mime_type is None: + mime_type = att.get("mime_type") + fetch_metadata = att.get("fetchMetadata") + if fetch_metadata is None: + fetch_metadata = att.get("fetch_metadata") out.append( Attachment( type=att.get("type", "file"), url=att.get("url"), name=att.get("name"), - mime_type=att.get("mimeType") or att.get("mime_type"), + mime_type=mime_type, size=att.get("size"), width=att.get("width"), height=att.get("height"), - fetch_metadata=att.get("fetchMetadata") or att.get("fetch_metadata"), + fetch_metadata=fetch_metadata, ) ) return out diff --git a/tests/test_telegram_adapter.py b/tests/test_telegram_adapter.py index 5653f64..69f10ea 100644 --- a/tests/test_telegram_adapter.py +++ b/tests/test_telegram_adapter.py @@ -378,16 +378,24 @@ def test_adapter_class_properties(self): class TestRehydrateAttachment: """Port of the Telegram adapter's ``rehydrateAttachment`` behavior.""" - def test_rehydrate_attachment_uses_file_id_from_fetch_metadata(self): + @pytest.mark.asyncio + async def test_rehydrate_attachment_uses_file_id_from_fetch_metadata(self): + from unittest.mock import AsyncMock + from chat_sdk.types import Attachment adapter = _make_adapter() + adapter.download_file = AsyncMock(return_value=b"ok") attachment = Attachment( type="image", fetch_metadata={"fileId": "AgACAgIAAxkB"}, ) rehydrated = adapter.rehydrate_attachment(attachment) assert rehydrated.fetch_data is not None + # Execute the rehydrated closure to verify it wires file_id correctly. + data = await rehydrated.fetch_data() + assert data == b"ok" + adapter.download_file.assert_awaited_once_with("AgACAgIAAxkB") # fetch_metadata is preserved so the attachment stays serializable/rehydratable again. assert rehydrated.fetch_metadata == {"fileId": "AgACAgIAAxkB"} diff --git a/tests/test_whatsapp_webhook.py b/tests/test_whatsapp_webhook.py index dfd25c4..93cfcf3 100644 --- a/tests/test_whatsapp_webhook.py +++ b/tests/test_whatsapp_webhook.py @@ -697,16 +697,24 @@ async def test_delete_message_raises(self): class TestRehydrateAttachment: """Cover ``WhatsAppAdapter.rehydrate_attachment``.""" - def test_rehydrates_fetch_data_from_media_id(self): + @pytest.mark.asyncio + async def test_rehydrates_fetch_data_from_media_id(self): + from unittest.mock import AsyncMock + from chat_sdk.types import Attachment adapter = _make_adapter() + adapter.download_media = AsyncMock(return_value=b"ok") attachment = Attachment( type="image", fetch_metadata={"mediaId": "media-42"}, ) rehydrated = adapter.rehydrate_attachment(attachment) assert rehydrated.fetch_data is not None + # Execute the rehydrated closure to verify it wires media_id correctly. + data = await rehydrated.fetch_data() + assert data == b"ok" + adapter.download_media.assert_awaited_once_with("media-42") assert rehydrated.fetch_metadata == {"mediaId": "media-42"} def test_returns_unchanged_when_no_media_id(self): From aa7bf48466ca99069cac1c52133257913912da82 Mon Sep 17 00:00:00 2001 From: patrick-chinchill Date: Thu, 23 Apr 2026 19:40:14 -0700 Subject: [PATCH 3/4] fix(attachments): SSRF guards + Message-instance rehydration + truthiness fallbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second review pass on PR #67 (rehydrate_attachment). The previous fixup addressed pyrefly only — this commit resolves the remaining review feedback. SSRF guards (3 adapters) - Slack, Teams, Google Chat all rebuild fetch_data closures from serialized fetch_metadata["url"] in rehydrate_attachment. A tampered URL in persisted queue state could exfiltrate the workspace bot/OAuth token to an attacker-controlled host. Each adapter now validates the URL's scheme (https only) and host against a platform-specific allowlist before forwarding the auth header. Upstream TS does not validate; this is a Python-first divergence documented in docs/UPSTREAM_SYNC.md. - Slack: files.slack.com, slack.com, *.slack.com, *.slack-edge.com - Teams: Microsoft-owned hosts (graph.microsoft.com, smba.trafficmanager.net, *.sharepoint.com, *.botframework.com, *.office.com, attachments.office.net, …) - Google Chat: chat.googleapis.com, *.googleapis.com, *.googleusercontent.com, *.google.com Message-instance rehydration (P1) - Chat._rehydrate_message used to early-return on Message inputs, matching upstream TS's `raw instanceof Message` shortcut. That shortcut is safe in upstream because its state adapters return raw JSON dicts from dequeue. Our RedisStateAdapter / PostgresStateAdapter both upgrade the dequeued dict to `Message.from_json(...)` before returning, so the early return would skip rehydrate_attachment for every persistent-backend dequeue and leave fetch_data stripped. We now fall through and apply the rehydrate pass on Message inputs too (already-hydrated attachments with fetch_data are filtered out). Truthiness fallbacks (Port Rule #1) - telegram, whatsapp rehydrate_attachment and types.py dual-key fetch_metadata lookup now use explicit `is not None` instead of `or`, so an empty-dict fetch_metadata is preserved. Teams connection pooling - _build_teams_fetch_data used httpx.AsyncClient as a throwaway context manager per download. Refactored to use the shared aiohttp session (_get_http_session) that the rest of the adapter already goes through. Test hardening - test_slack_webhook.py and test_teams_adapter.py now stub the fetch path with AsyncMock, await rehydrated.fetch_data(), and assert the URL + token that were forwarded. Previously the tests only checked that `fetch_data is not None` — they would have passed even if rehydration returned a dummy closure. - New tests per adapter verify the SSRF guard rejects untrusted hosts and the allowlist accepts the intended Slack/Teams/GCP hosts. - New regression test in test_chat_faithful.py drives a Message- instance dequeue through the chat queue and asserts rehydrate_attachment still fires. Slack adapter connection pooling (deferred) - _fetch_slack_file still uses httpx.AsyncClient per call. The Slack adapter has no pooled aiohttp helper (only slack_sdk.AsyncWebClient for Slack API calls), so adding one is a larger refactor left for a follow-up. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/UPSTREAM_SYNC.md | 2 + src/chat_sdk/adapters/google_chat/adapter.py | 43 ++++++- src/chat_sdk/adapters/slack/adapter.py | 49 +++++++- src/chat_sdk/adapters/teams/adapter.py | 76 ++++++++++-- src/chat_sdk/adapters/telegram/adapter.py | 2 +- src/chat_sdk/adapters/whatsapp/adapter.py | 2 +- src/chat_sdk/chat.py | 20 ++- src/chat_sdk/types.py | 8 +- tests/test_chat_faithful.py | 124 +++++++++++++++++++ tests/test_google_chat_adapter.py | 39 ++++++ tests/test_slack_webhook.py | 65 +++++++++- tests/test_teams_adapter.py | 95 +++++++++++++- 12 files changed, 492 insertions(+), 33 deletions(-) diff --git a/docs/UPSTREAM_SYNC.md b/docs/UPSTREAM_SYNC.md index 4db0bbf..526ace6 100644 --- a/docs/UPSTREAM_SYNC.md +++ b/docs/UPSTREAM_SYNC.md @@ -460,6 +460,8 @@ stay explicit instead of being rediscovered in code review. | `ConcurrencyConfig.max_concurrent` | Enforced via `asyncio.Semaphore` in the `"concurrent"` strategy path; rejects non-integer or `<= 0` values, and rejects any non-`None` `max_concurrent` paired with a non-`"concurrent"` strategy | Accepted into the config type with docstring "Default: Infinity" but never read (3 writes, 0 reads) | Silent correctness bug upstream — consumers setting `max_concurrent=N` with `strategy="concurrent"` reasonably expect an N-way bound on in-flight handlers. We honor the documented contract via a semaphore and fail-fast on misconfiguration so it's never silent. `max_concurrent=None` stays compatible with every strategy (unbounded default). | | Redis lock token format | `{token_prefix}_{ms}_{secrets.token_hex(16)}` — always 32 hex chars, CSPRNG-sourced | `ioredis_${Date.now()}_${Math.random().toString(36).substring(2, 15)}` — base36, ≤13 chars, **not** CSPRNG | Interop via `IoRedisStateAdapter(token_prefix="ioredis")` still works for lock-release (release/extend compare by full-string equality, and each runtime only releases what it issued), but the token byte-shape diverges. Intentional — CSPRNG should not be regressed to `Math.random()` for cosmetic byte-for-byte compatibility. | | `StreamingPlan.is_supported()` / `get_fallback_text()` | Raise `RuntimeError` to fail loudly if a generic posting path (e.g. `ChannelImpl.post`, `post_postable_object`) tries to consume a `StreamingPlan` as a normal `PostableObject` | Silently return `True` / `""` — `ChannelImpl.post` would route through `postPostableObject` and post an empty-string fallback | Prevents `StreamingPlan` being silently routed through non-stream-aware posting paths where upstream would post a blank message or attempt a wrong-shape `adapter.post_object("stream", ...)` call. Internal dispatch is guarded by the `kind == "stream"` short-circuit in `post_postable_object` / `Thread.post`; this also protects third-party code that duck-types PostableObjects. | +| `rehydrate_attachment` URL allowlist (Slack / Teams / Google Chat) | Validates the downloaded URL's scheme + host against a per-adapter allowlist inside the fetch closure; raises `ValidationError` on untrusted hosts before forwarding bearer tokens | No validation — `fetchData` blindly GETs `fetchMetadata.url` and forwards the workspace/bot token | SSRF + token-exfil risk upstream: after the 4.26 `rehydrateAttachment` hook lands, a crafted `fetchMetadata` in persisted state can redirect auth'd downloads to an arbitrary host. Python port enforces `CLAUDE.md`'s "Validate external URLs before requests (SSRF)" rule. Allowlist: Slack = `{files.slack.com, slack.com, *.slack.com, *.slack-edge.com}`; Teams = `{smba.trafficmanager.net, graph.microsoft.com, attachments.office.net, *.botframework.com, *.graph.microsoft.com, *.sharepoint.com, *.officeapps.live.com, *.office.com, *.office365.com, *.onedrive.com, *.microsoft.com}`; Google Chat = `{chat.googleapis.com, googleapis.com, *.googleapis.com, *.googleusercontent.com, *.google.com}`. | +| `_rehydrate_message` with `Message` input | Falls through to the `rehydrate_attachment` pass even when the dequeued entry is already a `Message` instance | Early-returns on `raw instanceof Message` before rehydration | The Python port's Redis + Postgres `dequeue()` upgrade raw JSON to `Message.from_json(...)` before returning (upstream's dequeue returns the raw JSON.parse'd dict). Upstream's `instanceof Message` shortcut therefore only fires for in-memory state, but ours would fire for persistent backends too, leaving `fetch_data` stripped forever. The rehydrate pass still skips any attachment that already has `fetch_data`, so in-memory callers pay no cost. | ### Platform-specific gaps diff --git a/src/chat_sdk/adapters/google_chat/adapter.py b/src/chat_sdk/adapters/google_chat/adapter.py index f15b10b..10503b3 100644 --- a/src/chat_sdk/adapters/google_chat/adapter.py +++ b/src/chat_sdk/adapters/google_chat/adapter.py @@ -19,6 +19,7 @@ from collections.abc import AsyncIterable, Awaitable, Callable from datetime import datetime, timezone from typing import Any, NoReturn +from urllib.parse import urlparse from chat_sdk.adapters.google_chat.cards import card_to_google_card from chat_sdk.adapters.google_chat.format_converter import GoogleChatFormatConverter @@ -2631,6 +2632,37 @@ def _create_attachment(self, att: dict[str, Any]) -> Attachment: fetch_metadata=fetch_meta or None, ) + @staticmethod + def _is_trusted_gchat_download_url(url: str) -> bool: + """Gate Google Chat attachment downloads to Google-owned hosts. + + After ``rehydrate_attachment`` reconstructs the fetch closure + from serialized ``fetch_metadata``, the URL may have been + tampered with in the state store. We refuse to forward the + OAuth access token unless the host is a known Google-owned host. + + This is a Python-first divergence: upstream Google Chat adapter + does not validate the URL. See ``docs/UPSTREAM_SYNC.md`` Known + Non-Parity. + """ + try: + parsed = urlparse(url) + except (ValueError, TypeError): + return False + if parsed.scheme != "https": + return False + host = (parsed.hostname or "").lower() + if not host: + return False + allowed_suffixes = ( + ".googleapis.com", + ".googleusercontent.com", + ".google.com", + ) + if host.endswith(allowed_suffixes): + return True + return host in {"chat.googleapis.com", "googleapis.com"} + def _build_gchat_fetch_data( self, resource_name: str | None, @@ -2656,8 +2688,15 @@ async def _fetch_data() -> bytes: ) return await response.read() - # Fallback to direct URL fetch (downloadUri) + # Fallback to direct URL fetch (downloadUri). Validate the + # host before forwarding the OAuth token — the URL may have + # been rebuilt from serialized metadata and tampered with. if url: + if not adapter._is_trusted_gchat_download_url(url): + raise ValidationError( + "gchat", + f"Refusing to fetch Google Chat file from untrusted URL: {url}", + ) token = await adapter._get_access_token() session = await adapter._get_http_session() async with session.get( @@ -2682,7 +2721,7 @@ def rehydrate_attachment(self, attachment: Attachment) -> Attachment: ``url`` (fallback) from ``fetch_metadata``. Returns the attachment unchanged when neither identifier is present. """ - meta = attachment.fetch_metadata or {} + meta = attachment.fetch_metadata if attachment.fetch_metadata is not None else {} resource_name = meta.get("resourceName") meta_url = meta.get("url") url = meta_url if meta_url is not None else attachment.url diff --git a/src/chat_sdk/adapters/slack/adapter.py b/src/chat_sdk/adapters/slack/adapter.py index b665c0f..e9f9b9e 100644 --- a/src/chat_sdk/adapters/slack/adapter.py +++ b/src/chat_sdk/adapters/slack/adapter.py @@ -23,7 +23,7 @@ from contextvars import ContextVar from datetime import datetime, timezone from typing import Any, NoReturn, cast -from urllib.parse import parse_qs +from urllib.parse import parse_qs, urlparse from chat_sdk.adapters.slack.cards import ( card_to_block_kit, @@ -1885,13 +1885,48 @@ async def fetch_data() -> bytes: fetch_metadata=fetch_meta or None, ) + @staticmethod + def _is_trusted_slack_download_url(url: str) -> bool: + """Gate Slack file downloads to known Slack-owned hosts. + + We refuse to forward ``Authorization: Bearer {token}`` to an + arbitrary URL. After ``rehydrate_attachment`` reconstructs the + fetch closure from serialized ``fetch_metadata``, that URL may + have been tampered with in the state store — a crafted value + could exfiltrate the workspace bot token. + + This is a Python-first divergence: upstream Slack adapter does not + validate the URL. See ``docs/UPSTREAM_SYNC.md`` Known Non-Parity. + """ + try: + parsed = urlparse(url) + except (ValueError, TypeError): + return False + if parsed.scheme != "https": + return False + host = (parsed.hostname or "").lower() + if not host: + return False + # Exact-match hosts + if host in {"files.slack.com", "slack.com"}: + return True + # Suffix match for Slack-owned subdomains + return host.endswith(".slack.com") or host.endswith(".slack-edge.com") + async def _fetch_slack_file(self, url: str, token: str) -> bytes: """Download a file from a Slack ``url_private`` endpoint. Shared by :meth:`_create_attachment` (direct fetch closure) and :meth:`rehydrate_attachment` (reconstructed closure after JSON - roundtrip). + roundtrip). Validates the host against the Slack allowlist + before forwarding the bot token (SSRF guard). """ + if not self._is_trusted_slack_download_url(url): + raise ValidationError( + "slack", + f"Refusing to fetch Slack file from untrusted URL: {url}", + ) + import httpx async with httpx.AsyncClient() as http: @@ -1913,10 +1948,14 @@ def rehydrate_attachment(self, attachment: Attachment) -> Attachment: ``attachment.fetch_metadata``, and rebuilds a ``fetch_data`` closure that resolves the workspace-specific bot token at call time. - Returns the attachment unchanged when no URL is available. + Returns the attachment unchanged when no URL is available. The + URL is re-validated inside the closure (by ``_fetch_slack_file``) + rather than here so that a trusted-at-serialize-time URL still + fails closed if the allowlist tightens later. """ - meta = attachment.fetch_metadata or {} - url = meta.get("url") or attachment.url + meta = attachment.fetch_metadata if attachment.fetch_metadata is not None else {} + meta_url = meta.get("url") + url = meta_url if meta_url is not None else attachment.url team_id = meta.get("teamId") if not url: return attachment diff --git a/src/chat_sdk/adapters/teams/adapter.py b/src/chat_sdk/adapters/teams/adapter.py index 400c82e..33bc85f 100644 --- a/src/chat_sdk/adapters/teams/adapter.py +++ b/src/chat_sdk/adapters/teams/adapter.py @@ -17,6 +17,7 @@ from collections.abc import Awaitable, Callable from datetime import datetime, timezone from typing import Any, Literal, NoReturn +from urllib.parse import urlparse from chat_sdk.adapters.teams.cards import card_to_adaptive_card from chat_sdk.adapters.teams.format_converter import TeamsFormatConverter @@ -593,16 +594,70 @@ def _create_attachment(self, att: dict[str, Any]) -> Attachment: fetch_data=self._build_teams_fetch_data(url) if url else None, ) + @staticmethod + def _is_trusted_teams_download_url(url: str) -> bool: + """Gate Teams file downloads to Microsoft-owned hosts. + + After ``rehydrate_attachment`` reconstructs the fetch closure + from serialized ``fetch_metadata``, the URL may have been + tampered with. We refuse to issue a direct GET unless the host + is a known Microsoft/Graph download host. + + This is a Python-first divergence: upstream Teams adapter does + not validate the URL. See ``docs/UPSTREAM_SYNC.md`` Known + Non-Parity. + """ + try: + parsed = urlparse(url) + except (ValueError, TypeError): + return False + if parsed.scheme != "https": + return False + host = (parsed.hostname or "").lower() + if not host: + return False + # Microsoft Graph / Bot Framework / SharePoint / Teams file hosts + allowed_suffixes = ( + ".botframework.com", + ".graph.microsoft.com", + ".sharepoint.com", + ".officeapps.live.com", + ".office.com", + ".office365.com", + ".onedrive.com", + ".microsoft.com", + ) + if host.endswith(allowed_suffixes): + return True + # Exact-match traffic-manager / Graph / Teams service hosts + return host in { + "smba.trafficmanager.net", + "graph.microsoft.com", + "attachments.office.net", + } + def _build_teams_fetch_data(self, url: str) -> Callable[[], Awaitable[bytes]]: - """Build a lazy ``fetch_data`` closure for a Teams file URL.""" + """Build a lazy ``fetch_data`` closure for a Teams file URL. - async def fetch_data() -> bytes: - import httpx + Uses the adapter's shared ``aiohttp.ClientSession`` (via + :meth:`_get_http_session`) so downloads reuse the connection + pool instead of constructing a throwaway client per request. + """ - async with httpx.AsyncClient() as http: - resp = await http.get(url) - resp.raise_for_status() - return resp.content + async def fetch_data() -> bytes: + if not self._is_trusted_teams_download_url(url): + raise ValidationError( + "teams", + f"Refusing to fetch Teams file from untrusted URL: {url}", + ) + session = await self._get_http_session() + async with session.get(url) as resp: + if resp.status >= 400: + raise NetworkError( + "teams", + f"Failed to fetch file: {resp.status}", + ) + return await resp.read() return fetch_data @@ -613,9 +668,12 @@ def rehydrate_attachment(self, attachment: Attachment) -> Attachment: need to rebuild the download closure is the URL — either from ``fetch_metadata["url"]`` or the attachment's top-level ``url``. Returns the attachment unchanged when no URL is available. + The URL host is validated inside the closure, so tampered URLs + raise at fetch time. """ - meta = attachment.fetch_metadata or {} - url = meta.get("url") or attachment.url + meta = attachment.fetch_metadata if attachment.fetch_metadata is not None else {} + meta_url = meta.get("url") + url = meta_url if meta_url is not None else attachment.url if not url: return attachment return Attachment( diff --git a/src/chat_sdk/adapters/telegram/adapter.py b/src/chat_sdk/adapters/telegram/adapter.py index 00548b8..48732ce 100644 --- a/src/chat_sdk/adapters/telegram/adapter.py +++ b/src/chat_sdk/adapters/telegram/adapter.py @@ -1376,7 +1376,7 @@ def rehydrate_attachment(self, attachment: Attachment) -> Attachment: no file ID is present (e.g. a pre-serialized attachment that did not originate from this adapter). """ - meta = attachment.fetch_metadata or {} + meta = attachment.fetch_metadata if attachment.fetch_metadata is not None else {} file_id = meta.get("fileId") if not file_id: return attachment diff --git a/src/chat_sdk/adapters/whatsapp/adapter.py b/src/chat_sdk/adapters/whatsapp/adapter.py index 308671c..9f2fcd2 100644 --- a/src/chat_sdk/adapters/whatsapp/adapter.py +++ b/src/chat_sdk/adapters/whatsapp/adapter.py @@ -643,7 +643,7 @@ def rehydrate_attachment(self, attachment: Attachment) -> Attachment: ``download_media`` closure. Returns the attachment unchanged when no media ID is present. """ - meta = attachment.fetch_metadata or {} + meta = attachment.fetch_metadata if attachment.fetch_metadata is not None else {} media_id = meta.get("mediaId") if not media_id: return attachment diff --git a/src/chat_sdk/chat.py b/src/chat_sdk/chat.py index ea29d3d..e5b8a51 100644 --- a/src/chat_sdk/chat.py +++ b/src/chat_sdk/chat.py @@ -2135,13 +2135,21 @@ def _rehydrate_message(self, raw: Any, adapter: Adapter | None = None) -> Messag attachment that lost its ``fetch_data`` closure so downstream handlers can still download bytes. """ - # Matches upstream: if the entry is already a Message instance, its - # fetch_data closures never went through a JSON roundtrip, so we - # return it untouched — no rehydrate pass. + # Diverges from upstream: upstream TS has + # ``if (raw instanceof Message) return raw;`` because its Redis / + # Postgres ``dequeue()`` returns the raw ``JSON.parse(value)`` — + # never a ``Message`` instance. Our Python port's Redis + + # Postgres ``dequeue()`` already upgrade the raw dict to + # ``Message.from_json(...)`` before returning (see + # ``state/redis.py`` and ``state/postgres.py``). An early return + # here would therefore skip ``rehydrate_attachment`` for every + # dequeued Message in a persistent backend, leaving + # ``fetch_data`` stripped. We fall through and apply the + # rehydrate pass; attachments that still have ``fetch_data`` + # (e.g. in-memory state) are filtered out below. if isinstance(raw, Message): - return raw - - if isinstance(raw, dict): + msg = raw + elif isinstance(raw, dict): if raw.get("_type") == "chat:Message": msg = _message_from_json(raw) else: diff --git a/src/chat_sdk/types.py b/src/chat_sdk/types.py index 312f172..e43243c 100644 --- a/src/chat_sdk/types.py +++ b/src/chat_sdk/types.py @@ -519,7 +519,9 @@ def from_json(cls, data: dict[str, Any] | Message) -> Message: size=att.get("size"), width=att.get("width"), height=att.get("height"), - fetch_metadata=att.get("fetchMetadata") or att.get("fetch_metadata"), + fetch_metadata=( + att.get("fetchMetadata") if att.get("fetchMetadata") is not None else att.get("fetch_metadata") + ), ) for att in attachments_data ], @@ -591,7 +593,9 @@ def from_json_compat(cls, data: dict[str, Any]) -> Message: size=att.get("size"), width=att.get("width"), height=att.get("height"), - fetch_metadata=att.get("fetch_metadata") or att.get("fetchMetadata"), + fetch_metadata=( + att.get("fetch_metadata") if att.get("fetch_metadata") is not None else att.get("fetchMetadata") + ), ) for att in attachments_data ], diff --git a/tests/test_chat_faithful.py b/tests/test_chat_faithful.py index 17a87f3..500970f 100644 --- a/tests/test_chat_faithful.py +++ b/tests/test_chat_faithful.py @@ -2380,6 +2380,54 @@ async def enqueue(thread_id: str, entry: QueueEntry, max_size: int) -> int: state.enqueue = enqueue # type: ignore[method-assign] +def _install_message_instance_roundtrip(state: MockStateAdapter) -> None: + """Simulate the Python Redis / Postgres ``dequeue`` upgrade path. + + Unlike upstream TS state adapters (which return ``JSON.parse(...)`` raw + dicts from ``dequeue``), our ``RedisStateAdapter.dequeue`` and + ``PostgresStateAdapter.dequeue`` call ``Message.from_json(...)`` on the + dequeued payload and return a proper :class:`Message` instance whose + attachments have been JSON-roundtripped (so ``fetch_data`` is stripped). + + This wrapper recreates that behavior on the in-memory state adapter so + we can assert that ``Chat._rehydrate_message`` still calls the + adapter's ``rehydrate_attachment`` hook on ``Message`` inputs. + """ + import json as _json + + original_enqueue = state.enqueue + original_dequeue = state.dequeue + + async def enqueue(thread_id: str, entry: QueueEntry, max_size: int) -> int: + serialized_msg = _json.loads(_json.dumps(entry.message.to_json())) + serialized_entry = QueueEntry( + enqueued_at=entry.enqueued_at, + expires_at=entry.expires_at, + message=serialized_msg, # type: ignore[arg-type] + ) + return await original_enqueue(thread_id, serialized_entry, max_size) + + async def dequeue(thread_id: str) -> QueueEntry | None: + from chat_sdk.types import Message + + entry = await original_dequeue(thread_id) + if entry is None: + return None + raw_msg = entry.message + # Upstream's dequeue leaves this as a raw dict. Ours rebuilds a + # Message instance — mimic that behavior here. + if isinstance(raw_msg, dict) and raw_msg.get("_type") == "chat:Message": + raw_msg = Message.from_json(raw_msg) + return QueueEntry( + enqueued_at=entry.enqueued_at, + expires_at=entry.expires_at, + message=raw_msg, + ) + + state.enqueue = enqueue # type: ignore[method-assign] + state.dequeue = dequeue # type: ignore[method-assign] + + class TestConcurrencyQueueAttachmentRehydration: """Faithful port of TS ``describe("concurrency: queue attachment rehydration")``.""" @@ -2557,6 +2605,82 @@ async def original_fetch() -> bytes: assert queued_attachments[0].fetch_metadata == {"url": "https://example.com/f.pdf"} assert queued_attachments[0].fetch_data is None + # Python-first regression: Redis / Postgres `dequeue` returns a Message + # instance (not a raw dict). Without our divergence from upstream's + # `raw instanceof Message -> return untouched` shortcut, rehydration + # would be silently skipped for every persistent-backend dequeue. + async def test_rehydrates_attachments_when_dequeue_returns_message_instance(self): + state = create_mock_state() + _install_message_instance_roundtrip(state) + adapter = create_mock_adapter("slack") + + rehydrate_calls: list[Attachment] = [] + + async def mock_fetch_data() -> bytes: + return b"rehydrated-bytes" + + def rehydrate(att: Attachment) -> Attachment: + rehydrate_calls.append(att) + return Attachment( + type=att.type, + url=att.url, + name=att.name, + mime_type=att.mime_type, + size=att.size, + width=att.width, + height=att.height, + fetch_metadata=att.fetch_metadata, + fetch_data=mock_fetch_data, + ) + + adapter.rehydrate_attachment = rehydrate # type: ignore[attr-defined] + + chat, _, _ = await _init_chat(adapter=adapter, state=state, concurrency="queue") + + received_attachments: list[list[Attachment]] = [] + + @chat.on_mention + async def handler(thread, message, context=None): + received_attachments.append(message.attachments) + + await state.acquire_lock("slack:C123:1234.5678", 30000) + + async def original_fetch() -> bytes: + return b"original" + + msg = create_test_message( + "msg-mi-1", + "Hey @slack-bot file", + attachments=[ + Attachment( + type="file", + url="https://example.com/f.pdf", + name="f.pdf", + fetch_metadata={"url": "https://example.com/f.pdf"}, + fetch_data=original_fetch, + ), + ], + ) + + await chat.handle_incoming_message(adapter, "slack:C123:1234.5678", msg) + + await state.force_release_lock("slack:C123:1234.5678") + trigger = create_test_message("msg-mi-2", "Hey @slack-bot trigger") + await chat.handle_incoming_message(adapter, "slack:C123:1234.5678", trigger) + + # The queued attachment went through a JSON roundtrip (fetch_data + # stripped) and was upgraded to a Message instance by the wrapper + # dequeue — rehydrate_attachment must still fire. + assert len(rehydrate_calls) == 1 + assert rehydrate_calls[0].name == "f.pdf" + + queued_attachments = next( + (atts for atts in received_attachments if atts and atts[0].name == "f.pdf"), + None, + ) + assert queued_attachments is not None + assert queued_attachments[0].fetch_data is mock_fetch_data + # ============================================================================ # 20. concurrency: debounce (tests 84-85) diff --git a/tests/test_google_chat_adapter.py b/tests/test_google_chat_adapter.py index 80a0d8b..c726af0 100644 --- a/tests/test_google_chat_adapter.py +++ b/tests/test_google_chat_adapter.py @@ -245,3 +245,42 @@ def test_returns_unchanged_when_no_metadata(self): attachment = Attachment(type="file", name="local.bin") rehydrated = adapter.rehydrate_attachment(attachment) assert rehydrated is attachment + + # Python-first divergence: SSRF guard on the downloadUri fallback path. + # The resource_name branch stays trusted (the URL is constructed by + # `_build_gchat_fetch_data` from a validated ``spaces/.../messages/...`` + # identifier, not from an attacker-controllable string). The `url` + # branch is the one that accepts serialized fetch_metadata. + @pytest.mark.asyncio + async def test_rehydrated_fetch_data_rejects_untrusted_url(self): + from unittest.mock import AsyncMock + + from chat_sdk.types import Attachment + + adapter = _make_adapter() + # These must never be awaited — validation rejects first. + adapter._get_access_token = AsyncMock() # type: ignore[method-assign] + adapter._get_http_session = AsyncMock() # type: ignore[method-assign] + + attachment = Attachment( + type="image", + url="https://attacker.example.com/pwn", + fetch_metadata={"url": "https://attacker.example.com/pwn"}, + ) + rehydrated = adapter.rehydrate_attachment(attachment) + assert rehydrated.fetch_data is not None + with pytest.raises(ValidationError): + await rehydrated.fetch_data() + adapter._get_access_token.assert_not_awaited() + adapter._get_http_session.assert_not_awaited() + + def test_is_trusted_gchat_download_url_allowlist(self): + assert GoogleChatAdapter._is_trusted_gchat_download_url("https://chat.googleapis.com/v1/media/X?alt=media") + assert GoogleChatAdapter._is_trusted_gchat_download_url("https://lh3.googleusercontent.com/photo.jpg") + assert GoogleChatAdapter._is_trusted_gchat_download_url("https://foo.google.com/x") + # Rejects non-HTTPS + assert not GoogleChatAdapter._is_trusted_gchat_download_url("http://chat.googleapis.com/x") + # Rejects arbitrary hosts + assert not GoogleChatAdapter._is_trusted_gchat_download_url("https://attacker.example/x") + # Rejects look-alikes + assert not GoogleChatAdapter._is_trusted_gchat_download_url("https://chat.googleapis.com.attacker.tld/x") diff --git a/tests/test_slack_webhook.py b/tests/test_slack_webhook.py index 4cbf351..60e5b2d 100644 --- a/tests/test_slack_webhook.py +++ b/tests/test_slack_webhook.py @@ -565,6 +565,10 @@ async def test_should_resolve_token_from_installation_when_teamid_is_present(sel ), ) + # Stub the network GET — assert the tenant token is forwarded. + fetch_mock = AsyncMock(return_value=b"workspace-bytes") + adapter._fetch_slack_file = fetch_mock # type: ignore[method-assign] + rehydrated = adapter.rehydrate_attachment( Attachment( type="image", @@ -577,12 +581,22 @@ async def test_should_resolve_token_from_installation_when_teamid_is_present(sel ) assert rehydrated.fetch_data is not None + result = await rehydrated.fetch_data() + assert result == b"workspace-bytes" + fetch_mock.assert_awaited_once_with( + "https://files.slack.com/img.png", + "xoxb-multi-workspace-token", + ) # TS: "should fall back to getToken when no teamId in fetchMetadata" - def test_should_fall_back_to_gettoken_when_no_teamid_in_fetchmetadata(self): + @pytest.mark.asyncio + async def test_should_fall_back_to_gettoken_when_no_teamid_in_fetchmetadata(self): from chat_sdk.types import Attachment adapter = _make_adapter(bot_token="xoxb-single") + fetch_mock = AsyncMock(return_value=b"single-bytes") + adapter._fetch_slack_file = fetch_mock # type: ignore[method-assign] + rehydrated = adapter.rehydrate_attachment( Attachment( type="image", @@ -591,6 +605,13 @@ def test_should_fall_back_to_gettoken_when_no_teamid_in_fetchmetadata(self): ) ) assert rehydrated.fetch_data is not None + result = await rehydrated.fetch_data() + assert result == b"single-bytes" + # Bot token (not a workspace-specific install token) is forwarded. + fetch_mock.assert_awaited_once_with( + "https://files.slack.com/img.png", + "xoxb-single", + ) # TS: "should return attachment unchanged when no url" def test_should_return_attachment_unchanged_when_no_url(self): @@ -604,6 +625,48 @@ def test_should_return_attachment_unchanged_when_no_url(self): # Upstream asserts `toBe(attachment)` — identical object. assert rehydrated is attachment + # Python-first divergence: reject SSRF vectors at fetch time even if the + # serialized attachment appeared valid when it was queued. + @pytest.mark.asyncio + async def test_rehydrated_fetch_data_rejects_untrusted_host(self): + from chat_sdk.types import Attachment + + adapter = _make_adapter(bot_token="xoxb-ssrf-token") + + # Sentinel — should NEVER be reached because validation rejects first. + evil_fetch = AsyncMock(return_value=b"should-not-run") + adapter._fetch_slack_file = evil_fetch # type: ignore[method-assign] + # Restore the real validator + wrap it so we can assert on behavior. + real_fetch = SlackAdapter._fetch_slack_file + + async def guarded_fetch(url: str, token: str) -> bytes: + return await real_fetch(adapter, url, token) + + adapter._fetch_slack_file = guarded_fetch # type: ignore[method-assign] + + rehydrated = adapter.rehydrate_attachment( + Attachment( + type="image", + url="https://attacker.example.com/steal", + fetch_metadata={"url": "https://attacker.example.com/steal"}, + ) + ) + assert rehydrated.fetch_data is not None + with pytest.raises(ValidationError): + await rehydrated.fetch_data() + + def test_is_trusted_slack_download_url_allowlist(self): + # Accepts Slack-owned HTTPS hosts + assert SlackAdapter._is_trusted_slack_download_url("https://files.slack.com/f.png") + assert SlackAdapter._is_trusted_slack_download_url("https://foo.slack-edge.com/x.png") + assert SlackAdapter._is_trusted_slack_download_url("https://edge.slack.com/x") + # Rejects non-HTTPS even on a trusted host + assert not SlackAdapter._is_trusted_slack_download_url("http://files.slack.com/x") + # Rejects arbitrary hosts + assert not SlackAdapter._is_trusted_slack_download_url("https://attacker.example/x") + # Rejects look-alike hosts that merely contain "slack.com" + assert not SlackAdapter._is_trusted_slack_download_url("https://slack.com.attacker.tld/x") + # --------------------------------------------------------------------------- # Edge cases diff --git a/tests/test_teams_adapter.py b/tests/test_teams_adapter.py index ddb410d..2479d65 100644 --- a/tests/test_teams_adapter.py +++ b/tests/test_teams_adapter.py @@ -37,6 +37,33 @@ def _make_logger(): ) +class _MockAiohttpSession: + """Stub for ``aiohttp.ClientSession`` that supports the + ``async with session.get(url) as resp`` pattern. + + ``session.get(url)`` returns a synchronous async-context-manager (not + a coroutine), so we can't use ``AsyncMock`` for it — the real + aiohttp API is not itself async. Implementing it as a real method + also keeps us out of ``audit_test_quality.py``'s "MagicMock used for + async method `.get`" false-positive (which pattern-matches on + ``KeyValueState.get``, not ``ClientSession.get``). + """ + + def __init__(self, payload: bytes = b"", status: int = 200): + response = MagicMock() + response.status = status + response.read = AsyncMock(return_value=payload) + cm = MagicMock() + cm.__aenter__ = AsyncMock(return_value=response) + cm.__aexit__ = AsyncMock(return_value=False) + self._cm = cm + self.get_calls: list[str] = [] + + def get(self, url: str): + self.get_calls.append(url) + return self._cm + + # --------------------------------------------------------------------------- # Factory function # --------------------------------------------------------------------------- @@ -352,27 +379,49 @@ def test_attachment_stores_url_in_fetch_metadata(self): class TestRehydrateAttachment: - def test_rehydrates_fetch_data_from_fetch_metadata_url(self): - """After JSON roundtrip (fetch_data stripped), the URL in fetch_metadata restores the closure.""" + @pytest.mark.asyncio + async def test_rehydrates_fetch_data_from_fetch_metadata_url(self): + """After JSON roundtrip (fetch_data stripped), the URL in fetch_metadata restores the closure. + + Awaits the rebuilt closure against a stubbed HTTP session to prove + the wire-up is correct (not just "some callable was returned"). + """ from chat_sdk.types import Attachment + trusted_url = "https://graph.microsoft.com/photo.jpg" adapter = _make_adapter(app_id="test-app") + + session = _MockAiohttpSession(payload=b"teams-bytes") + adapter._get_http_session = AsyncMock(return_value=session) # type: ignore[method-assign] + attachment = Attachment( type="image", - url="https://x.com/photo.jpg", - fetch_metadata={"url": "https://x.com/photo.jpg"}, + url=trusted_url, + fetch_metadata={"url": trusted_url}, ) rehydrated = adapter.rehydrate_attachment(attachment) assert rehydrated.fetch_data is not None - def test_rehydrate_falls_back_to_attachment_url_when_fetch_metadata_missing(self): + bytes_result = await rehydrated.fetch_data() + assert bytes_result == b"teams-bytes" + assert session.get_calls == [trusted_url] + + @pytest.mark.asyncio + async def test_rehydrate_falls_back_to_attachment_url_when_fetch_metadata_missing(self): """When fetch_metadata is absent, rehydrate falls back to the attachment's top-level url.""" from chat_sdk.types import Attachment + trusted_url = "https://attachments.office.net/doc.pdf" adapter = _make_adapter(app_id="test-app") - attachment = Attachment(type="file", url="https://x.com/doc.pdf") + + session = _MockAiohttpSession(payload=b"fallback-bytes") + adapter._get_http_session = AsyncMock(return_value=session) # type: ignore[method-assign] + + attachment = Attachment(type="file", url=trusted_url) rehydrated = adapter.rehydrate_attachment(attachment) assert rehydrated.fetch_data is not None + assert await rehydrated.fetch_data() == b"fallback-bytes" + assert session.get_calls == [trusted_url] def test_rehydrate_returns_unchanged_when_no_url(self): """Without any URL, rehydrate returns the attachment unchanged.""" @@ -383,6 +432,40 @@ def test_rehydrate_returns_unchanged_when_no_url(self): rehydrated = adapter.rehydrate_attachment(attachment) assert rehydrated is attachment + # Python-first divergence: SSRF guard at fetch time. + @pytest.mark.asyncio + async def test_rehydrated_fetch_data_rejects_untrusted_host(self): + from chat_sdk.types import Attachment + + adapter = _make_adapter(app_id="test-app") + # If the validator is bypassed, this would be called — it must not be. + adapter._get_http_session = AsyncMock() # type: ignore[method-assign] + + attachment = Attachment( + type="image", + url="https://attacker.example.com/pwn.jpg", + fetch_metadata={"url": "https://attacker.example.com/pwn.jpg"}, + ) + rehydrated = adapter.rehydrate_attachment(attachment) + assert rehydrated.fetch_data is not None + with pytest.raises(ValidationError): + await rehydrated.fetch_data() + adapter._get_http_session.assert_not_awaited() + + def test_is_trusted_teams_download_url_allowlist(self): + # Accepts Microsoft-owned hosts + assert TeamsAdapter._is_trusted_teams_download_url("https://graph.microsoft.com/x") + assert TeamsAdapter._is_trusted_teams_download_url("https://foo.sharepoint.com/x") + assert TeamsAdapter._is_trusted_teams_download_url("https://smba.trafficmanager.net/x") + assert TeamsAdapter._is_trusted_teams_download_url("https://attachments.office.net/x") + assert TeamsAdapter._is_trusted_teams_download_url("https://x.botframework.com/y") + # Rejects non-HTTPS + assert not TeamsAdapter._is_trusted_teams_download_url("http://graph.microsoft.com/x") + # Rejects arbitrary hosts + assert not TeamsAdapter._is_trusted_teams_download_url("https://attacker.example/x") + # Rejects look-alikes + assert not TeamsAdapter._is_trusted_teams_download_url("https://graph.microsoft.com.attacker.tld/x") + # --------------------------------------------------------------------------- # normalizeMentions (via parseMessage) From 7224871dcfe63a383418629c2e4fac0385178d1f Mon Sep 17 00:00:00 2001 From: patrick-chinchill Date: Fri, 24 Apr 2026 01:40:59 -0700 Subject: [PATCH 4/4] fix(attachments): preserve Attachment.data on rehydrate + finish truthiness pass - _coerce_attachments + Message.from_json{_compat} now preserve the data: bytes | None field through rehydrate paths (was silently dropped) - Close the mime_type truthiness fallback in types.py:517,591 that the round-2 sweep missed - Docstring note on rehydrate_attachment: must be sync Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/UPSTREAM_SYNC.md | 1 + src/chat_sdk/chat.py | 6 ++ src/chat_sdk/types.py | 24 +++++++- tests/test_types.py | 129 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 158 insertions(+), 2 deletions(-) diff --git a/docs/UPSTREAM_SYNC.md b/docs/UPSTREAM_SYNC.md index 526ace6..1d60574 100644 --- a/docs/UPSTREAM_SYNC.md +++ b/docs/UPSTREAM_SYNC.md @@ -480,6 +480,7 @@ stay explicit instead of being rediscovered in code review. | `from_json()` | Accepts both camelCase and snake_case | camelCase only | | Slack installation keys | camelCase (matches TS, with snake_case fallback) | camelCase | | Redis/Postgres queue entries | Different wire format (message serialized via `to_json()`) | `JSON.stringify(entry)` directly | +| `Attachment.data` (bytes) | Not serialized by `to_json()` (bytes aren't JSON-safe). Preserved through in-memory rehydrate paths (`_coerce_attachments`, `Message.from_json{_compat}`) when raw dicts carry the field. A JSON roundtrip through Redis/Postgres state drops `data`; adapters should rely on `fetch_metadata` + `rehydrate_attachment` to reconstruct the download closure instead. | Same — `data` is not part of `SerializedAttachment` | ### Coverage confidence by module diff --git a/src/chat_sdk/chat.py b/src/chat_sdk/chat.py index e5b8a51..6911563 100644 --- a/src/chat_sdk/chat.py +++ b/src/chat_sdk/chat.py @@ -2276,6 +2276,12 @@ def _coerce_attachments(raw: Any) -> list[Attachment]: size=att.get("size"), width=att.get("width"), height=att.get("height"), + # ``data`` is not part of ``SerializedAttachment`` (bytes + # aren't JSON-safe, so ``to_json`` drops it). But + # in-memory state backends can hand us raw dicts that + # still carry the bytes; pass them through so we don't + # silently lose pre-fetched data on rehydrate. + data=att.get("data"), fetch_metadata=fetch_metadata, ) ) diff --git a/src/chat_sdk/types.py b/src/chat_sdk/types.py index e43243c..6cdfe29 100644 --- a/src/chat_sdk/types.py +++ b/src/chat_sdk/types.py @@ -515,10 +515,16 @@ def from_json(cls, data: dict[str, Any] | Message) -> Message: type=att.get("type", "file"), url=att.get("url"), name=att.get("name"), - mime_type=att.get("mimeType") or att.get("mime_type"), + mime_type=(att.get("mimeType") if att.get("mimeType") is not None else att.get("mime_type")), size=att.get("size"), width=att.get("width"), height=att.get("height"), + # ``data`` is not part of the ``SerializedAttachment`` wire + # shape (``to_json`` drops bytes — JSON can't carry them). + # We still accept it here so callers handing a raw dict + # that happens to carry pre-fetched bytes (e.g. in-memory + # state backends) don't silently lose the payload. + data=att.get("data"), fetch_metadata=( att.get("fetchMetadata") if att.get("fetchMetadata") is not None else att.get("fetch_metadata") ), @@ -589,10 +595,15 @@ def from_json_compat(cls, data: dict[str, Any]) -> Message: type=att.get("type", "file"), url=att.get("url"), name=att.get("name"), - mime_type=att.get("mime_type") or att.get("mimeType"), + mime_type=(att.get("mime_type") if att.get("mime_type") is not None else att.get("mimeType")), size=att.get("size"), width=att.get("width"), height=att.get("height"), + # ``data`` is not part of the ``SerializedAttachment`` wire + # shape (bytes aren't JSON-safe), but accepting it here + # keeps in-memory callers that pass raw dicts with + # pre-fetched bytes from silently losing the payload. + data=att.get("data"), fetch_metadata=( att.get("fetch_metadata") if att.get("fetch_metadata") is not None else att.get("fetchMetadata") ), @@ -1340,6 +1351,15 @@ def rehydrate_attachment(self, attachment: Attachment) -> Attachment: no-op (returns the attachment unchanged) — adapters that support file downloads should override it to rebuild the platform-specific download closure from ``attachment.fetch_metadata``. + + .. important:: + This hook must be **synchronous**. Async rehydration is not + supported — the call site assigns the return value directly into + ``Message.attachments``, so returning a coroutine would land a + coroutine in the list and downstream ``att.fetch_data`` access + would raise. If platform-specific rehydration needs I/O, push + the async work into the returned ``fetch_data`` closure (which + *is* awaited when consumers call it) instead of doing it here. """ return attachment diff --git a/tests/test_types.py b/tests/test_types.py index 3b81430..a456c8a 100644 --- a/tests/test_types.py +++ b/tests/test_types.py @@ -147,6 +147,135 @@ def test_defaults(self): assert att.data is None assert att.fetch_data is None + def test_data_bytes_preserved_through_coerce_attachments(self): + """``_coerce_attachments`` must not drop ``data: bytes`` from raw dicts. + + In-memory state adapters can hand us attachment dicts that still + carry pre-fetched bytes (bypassing ``to_json`` — bytes aren't + JSON-safe, so the serialization path explicitly omits them). + Round-trip the dict through ``_coerce_attachments`` and assert the + bytes survive; regression guard for the silent-data-loss bug on + the queue/debounce rehydrate paths. + """ + from chat_sdk.chat import _coerce_attachments + + raw = [ + { + "type": "file", + "url": "https://example.com/f.pdf", + "name": "f.pdf", + "mimeType": "application/pdf", + "data": b"PDF-bytes", + "fetchMetadata": {"url": "https://example.com/f.pdf"}, + } + ] + out = _coerce_attachments(raw) + assert len(out) == 1 + assert isinstance(out[0], Attachment) + assert out[0].data == b"PDF-bytes" + assert out[0].mime_type == "application/pdf" + assert out[0].fetch_metadata == {"url": "https://example.com/f.pdf"} + + def test_data_bytes_preserved_through_from_json(self): + """``Message.from_json`` must preserve raw ``data`` bytes when present. + + ``to_json`` drops ``data`` (JSON can't carry bytes), so it will + not appear on the wire — but callers that hand us a raw dict with + both the envelope and a ``data`` field should not silently lose + it. Exercises the camelCase-first (``from_json``) path. + """ + dt = datetime(2024, 6, 15, 10, 30, 0, tzinfo=timezone.utc) + raw = { + "_type": "chat:Message", + "id": "m1", + "threadId": "t1", + "text": "hi", + "formatted": {"type": "root", "children": []}, + "author": { + "userId": "U1", + "userName": "a", + "fullName": "A", + "isBot": False, + "isMe": False, + }, + "metadata": {"dateSent": dt.isoformat(), "edited": False}, + "attachments": [ + { + "type": "file", + "url": "https://example.com/f.pdf", + "mimeType": "application/pdf", + "data": b"PDF-bytes", + } + ], + } + msg = Message.from_json(raw) + assert len(msg.attachments) == 1 + assert msg.attachments[0].data == b"PDF-bytes" + assert msg.attachments[0].mime_type == "application/pdf" + + def test_data_bytes_preserved_through_from_json_compat(self): + """``Message.from_json_compat`` (snake_case-first) must preserve ``data``.""" + dt = datetime(2024, 6, 15, 10, 30, 0, tzinfo=timezone.utc) + raw = { + "id": "m1", + "thread_id": "t1", + "text": "hi", + "formatted": {"type": "root", "children": []}, + "author": { + "user_id": "U1", + "user_name": "a", + "full_name": "A", + "is_bot": False, + "is_me": False, + }, + "metadata": {"date_sent": dt.isoformat(), "edited": False}, + "attachments": [ + { + "type": "file", + "url": "https://example.com/f.pdf", + "mime_type": "application/pdf", + "data": b"PDF-bytes", + } + ], + } + msg = Message.from_json_compat(raw) + assert len(msg.attachments) == 1 + assert msg.attachments[0].data == b"PDF-bytes" + assert msg.attachments[0].mime_type == "application/pdf" + + def test_to_json_still_drops_bytes(self): + """``to_json`` must not emit ``data`` (bytes aren't JSON-safe). + + Regression guard: propagating ``data`` through the in-memory + rehydrate paths must not accidentally start serializing bytes + onto the wire. + """ + dt = datetime(2024, 6, 15, 10, 30, 0, tzinfo=timezone.utc) + msg = Message( + id="m1", + thread_id="t1", + text="hi", + formatted={"type": "root", "children": []}, + author=Author( + user_id="U1", + user_name="a", + full_name="A", + is_bot=False, + is_me=False, + ), + metadata=MessageMetadata(date_sent=dt), + attachments=[ + Attachment( + type="file", + url="https://example.com/f.pdf", + mime_type="application/pdf", + data=b"PDF-bytes", + ) + ], + ) + serialized = msg.to_json() + assert "data" not in serialized["attachments"][0] + class TestMessage: """Tests for Message dataclass."""