diff --git a/.changeset/config.json b/.changeset/config.json index c1dcce53..e4a186f5 100644 --- a/.changeset/config.json +++ b/.changeset/config.json @@ -7,5 +7,9 @@ "access": "public", "baseBranch": "main", "updateInternalDependencies": "patch", - "ignore": ["example-nextjs-chat", "@chat-adapter/integration-tests"] + "ignore": [ + "example-nextjs-chat", + "example-telegram-chat", + "@chat-adapter/integration-tests" + ] } diff --git a/.changeset/fix-telegram-markdownv2.md b/.changeset/fix-telegram-markdownv2.md new file mode 100644 index 00000000..54d2dd4c --- /dev/null +++ b/.changeset/fix-telegram-markdownv2.md @@ -0,0 +1,10 @@ +--- +"@chat-adapter/telegram": patch +"chat": minor +--- + +Switch Telegram adapter's outbound `parse_mode` from legacy `Markdown` to `MarkdownV2`, and replace the standard-markdown passthrough renderer with a proper AST → MarkdownV2 renderer. Standard markdown (`**bold**`) and legacy `Markdown` (`*bold*`) use different syntaxes and have no shared escape rules, so any message containing `.`, `!`, `(`, `)`, `-`, `_` in regular text — which is virtually every LLM-generated message — was being rejected with `can't parse entities`. The new renderer walks the mdast tree and emits MarkdownV2 with context-aware escaping (normal text vs. code blocks vs. link URLs), uniformly applies MarkdownV2 `parse_mode` to every format-converter output (including AST messages, which previously shipped without `parse_mode` and rendered asterisks literally), and escapes card fallback text. + +Also fix silent message truncation that the MarkdownV2 migration widened from a rare bug into a reliable 400. The previous truncator sliced messages at 4096/1024 chars and appended literal `...`, but in MarkdownV2 `.` is a reserved character that must be escaped, the slice can leave an orphan trailing `\`, and it can cut through a paired entity (`*bold*`, `` `code` ``) leaving it unclosed — all of which cause `can't parse entities`. The two truncate methods are unified into `truncateForTelegram(text, limit, parseMode)`, which appends an escaped `\.\.\.` for MarkdownV2 and walks back past unbalanced entity delimiters or orphan backslashes before appending. Plain-text messages keep literal `...`. + +Internal typing hardening: `renderMarkdownV2` is now typed exhaustively on mdast's `Nodes` union with a `never` assertion, so new mdast node types fail the build rather than silently falling through. Introduce `TelegramParseMode = "MarkdownV2" | "plain"` replacing the previous `string | undefined` at call sites, with `toBotApiParseMode` mapping to the Bot API wire format at the boundary. The `chat` package gains a re-export of mdast's `Nodes` union so adapters can build exhaustively typed renderers without importing mdast directly. diff --git a/examples/telegram-chat/README.md b/examples/telegram-chat/README.md new file mode 100644 index 00000000..c0f92936 --- /dev/null +++ b/examples/telegram-chat/README.md @@ -0,0 +1,42 @@ +# telegram-chat + +A Telegram bot that exercises the Chat SDK end-to-end: MarkdownV2 rendering, cards with inline-keyboard actions, reactions, file uploads, and streaming edits. Runs in polling mode — no webhook, no public URL, no deploy. + +Doubles as a reference example for developers learning the SDK and an interactive smoke-test harness for the `@chat-adapter/telegram` package. + +## Prerequisites + +- Node.js ≥ 20 +- A Telegram bot token from [@BotFather](https://t.me/BotFather) + +## Run + +From the repo root: + +```bash +pnpm install +TELEGRAM_BOT_TOKEN= pnpm --filter example-telegram-chat start +``` + +Optional: `TELEGRAM_BOT_USERNAME=` (defaults to `telegramchatdemobot`). + +Then DM the bot — any message opens the main menu. + +## What you see + +The bot replies with an inline keyboard with three categories: + +- **Text & Markdown** — 6 curated markdown demos plus a streaming edit loop +- **Cards & Actions** — interactive approval card, callback-data size probe, link buttons +- **Media & Reactions** — on-demand reactions, generated PNG and PDF uploads + +Every sub-menu has a `← Back` button. Sending any text at any time reopens the main menu. + +## Why it's stateless + +No thread subscription, no persistence. Every button press is self-contained; memory state is used only because the SDK requires a state adapter. If you need a stateful reference, see `examples/nextjs-chat`. + +## Related + +- [`packages/adapter-telegram`](../../packages/adapter-telegram) — adapter source and README +- [`examples/nextjs-chat`](../nextjs-chat) — full multi-platform example with AI integration, Redis state, and webhooks diff --git a/examples/telegram-chat/package.json b/examples/telegram-chat/package.json new file mode 100644 index 00000000..611c5103 --- /dev/null +++ b/examples/telegram-chat/package.json @@ -0,0 +1,20 @@ +{ + "name": "example-telegram-chat", + "version": "0.1.0", + "private": true, + "type": "module", + "scripts": { + "start": "tsx src/index.ts", + "typecheck": "tsc --noEmit" + }, + "dependencies": { + "@chat-adapter/state-memory": "workspace:*", + "@chat-adapter/telegram": "workspace:*", + "chat": "workspace:*" + }, + "devDependencies": { + "@types/node": "^22.10.2", + "tsx": "^4.19.2", + "typescript": "^5.7.2" + } +} diff --git a/examples/telegram-chat/src/demos/cards.tsx b/examples/telegram-chat/src/demos/cards.tsx new file mode 100644 index 00000000..4568ee2f --- /dev/null +++ b/examples/telegram-chat/src/demos/cards.tsx @@ -0,0 +1,232 @@ +/** + * Card demos — interactive approval, callback-data size probe, link button. + * + * The approval card is edited in-place on button press: the + * on-action handler calls adapter.editMessage(threadId, messageId, newCard) + * to replace the original buttons with a decision status. + * + * The size-probe card deliberately includes one button whose callback_data + * is well under the limit and one that exceeds Telegram's 64-byte cap, + * teaching the constraint without hiding it. + */ + +import { + Actions, + Button, + Card, + type CardElement, + CardText, + Divider, + Field, + Fields, + LinkButton, + Section, + type Thread, + toCardElement, +} from "chat"; +import { encode } from "../lib/callbacks"; + +type AnyThread = Thread; + +export const APPROVAL_DEMO_ID = "card.approval"; +const SIZE_PROBE_DEMO_ID = "card.size"; +const LINK_DEMO_ID = "card.link"; + +const PENDING_APPROVAL_CARD = ( + +
+ **Approval needed** for order #1234. + + + + + +
+ + + + + +
+); + +const TELEGRAM_CALLBACK_DATA_LIMIT = 64; + +const SAFE_BUTTON_ID = encode({ + kind: "act", + demo: SIZE_PROBE_DEMO_ID, + arg: "ok", +}); +// What Telegram actually sees on the wire: the adapter wraps each button +// id in `chat:{"a":"..."}` before shipping, so the effective budget is +// ~13 bytes less than the raw Telegram cap. +const SAFE_PAYLOAD_WIRE_BYTES = `chat:{"a":"${SAFE_BUTTON_ID}"}`.length; + +const OVERSIZE_ARG = "this-id-intentionally-long-to-exceed-the-64-byte-limit"; +const OVERSIZE_BUTTON_ID = encode({ + kind: "act", + demo: SIZE_PROBE_DEMO_ID, + arg: OVERSIZE_ARG, +}); +const OVERSIZE_PAYLOAD_WIRE_BYTES = `chat:{"a":"${OVERSIZE_BUTTON_ID}"}`.length; + +const SAFE_SIZE_CARD = ( + +
+ + Telegram caps `callback_data` at **{TELEGRAM_CALLBACK_DATA_LIMIT} + bytes**. This button's id fits the budget (including the adapter's + `chat:{"{}"}` envelope). + + + + + +
+ + + + +
+); + +const OVERSIZE_SIZE_CARD = ( + +
+ + Same encoding, longer arg, cap exceeded. This card will NOT post — the + SDK throws `ValidationError` at post time so the bug surfaces at the + line that constructed the button, not later at runtime. + + + + + +
+ + + + +
+); + +const LINK_CARD = ( + +
+ + {"`` opens a URL directly. No callback handler runs."} + +
+ + + View on GitHub + + Visit Vercel + +
+); + +export const CARD_DEMOS: { + id: string; + label: string; + run: (thread: AnyThread) => Promise; +}[] = [ + { + id: APPROVAL_DEMO_ID, + label: "Interactive approval card", + run: async (thread) => { + await thread.post(PENDING_APPROVAL_CARD); + }, + }, + { + id: SIZE_PROBE_DEMO_ID, + label: "Button-size probe (64 B)", + run: async (thread) => { + // First: show the working case with bytecounts. + await thread.post(SAFE_SIZE_CARD); + + // Then: attempt the oversize case. Expected to throw at post time — + // that's the teaching moment, not a failure. + try { + await thread.post(OVERSIZE_SIZE_CARD); + await thread.post( + "⚠️ Unexpected: oversize card posted without error. Did the adapter limit change?" + ); + } catch (err) { + const msg = (err as Error).message ?? String(err); + await thread.post({ + markdown: [ + "📎 **Expected ValidationError caught.**", + "", + `> ${msg}`, + "", + "The SDK refuses to ship malformed `callback_data` to Telegram. Alternatives the SDK could have chosen:", + "", + "- Silently truncate → button clicks would echo a truncated id that doesn't match any handler; silent runtime bug.", + "- Hash + server-side lookup → needs stateful bookkeeping that survives bot restarts; higher ops cost.", + "- **Throw at post time** → developer sees the failure at the line that caused it. (Chosen here.)", + "", + "Lesson: treat `callback_data` as a short routing key, never as app state. Store data elsewhere, keyed by a short id.", + ].join("\n"), + }); + } + }, + }, + { + id: LINK_DEMO_ID, + label: "LinkButton card", + run: async (thread) => { + await thread.post(LINK_CARD); + }, + }, +]; + +export function buildDecidedCard( + decision: "approve" | "reject", + user: string, + when: Date +): CardElement { + const label = decision === "approve" ? "✅ Approved" : "🚫 Rejected"; + const time = `${when.getHours().toString().padStart(2, "0")}:${when + .getMinutes() + .toString() + .padStart(2, "0")}`; + const jsx = ( + +
+ + {label} by @{user} at {time}. + + + + + + +
+
+ ); + const card = toCardElement(jsx); + if (!card) { + throw new Error("buildDecidedCard: toCardElement returned null"); + } + return card; +} diff --git a/examples/telegram-chat/src/demos/markdown.ts b/examples/telegram-chat/src/demos/markdown.ts new file mode 100644 index 00000000..c89930b9 --- /dev/null +++ b/examples/telegram-chat/src/demos/markdown.ts @@ -0,0 +1,194 @@ +/** + * MarkdownV2 rendering demos. + * + * Each demo posts a single message exercising one aspect of the renderer: + * plain text, inline emphasis, code blocks, links, lists/tables, the full + * 20-character escape matrix, a realistic LLM response, and a streaming + * edit loop. If any demo fails with "can't parse entities", the renderer + * has a bug. + */ + +import type { Thread } from "chat"; + +const LLM_CORPUS = [ + "# Trip Summary: Morocco", + "", + "Here's your **personalized** 7-day itinerary. Price: $2,450/person (all-inclusive)!", + "", + "## Day 1 — Arrival", + "", + "- Airport pickup at 14:30", + "- Check-in at *Riad El Fenn* (4-star)", + "- Dinner: [La Mamounia](https://www.mamounia.com/restaurants)", + "", + "> Tip: bring cash — souks don't always take cards.", + "", + "```bash", + "curl 'https://api.rates.io/MAD' | jq '.rate'", + "```", + "", + "| Day | Activity | Cost |", + "|-----|----------|------|", + "| 1 | Arrival | $200 |", + "| 2 | Atlas | $350 |", + "", + "~~Previous: $2,800~~. New total: **$2,450**.", +].join("\n"); + +const STREAMING_CHUNKS = [ + "# Streaming demo", + "\n\nWatch this message update in real time. Each chunk appends content and triggers an editMessage call.", + "\n\n**Progress:** `[█░░░░]`", + "\n\n**Progress:** `[███░░]`", + "\n\n**Progress:** `[█████]` — done!", + "\n\n- Rendering works per chunk", + "\n- Special chars escape correctly", + "\n- Final message has no raw asterisks", +]; + +const STREAM_CHUNK_DELAY_MS = 600; + +// Truncation demos: each produces a rendered message over Telegram's 4096-char +// limit, exercising a different code path in trimToMarkdownV2SafeBoundary. +// If any of these renders with the asterisks literal, an orphan `\`, or +// Telegram returns `can't parse entities`, the truncator has regressed. +const LONG_PLAIN_LENGTH = 5000; +const LONG_BODY_BEFORE_ENTITY = 4000; +const LONG_BODY_INSIDE_ENTITY = 1000; + +function longPlainMarkdown(): string { + // No special chars — rendered output is the same length as input. Truncation + // must append escaped `\.\.\.` and hold under the 4096 char limit. + return "a".repeat(LONG_PLAIN_LENGTH); +} + +function longWithUnclosedBold(): string { + // `**bold**` opens before the limit and closes after it. Naive truncation + // keeps the opening `*` without its closer → unclosed bold entity → 400. + return `${"a".repeat(LONG_BODY_BEFORE_ENTITY)}**${"b".repeat(LONG_BODY_INSIDE_ENTITY)}**`; +} + +function longWithUnclosedCode(): string { + // Same shape, inline code. Unclosed backtick entity → 400. + return `${"a".repeat(LONG_BODY_BEFORE_ENTITY)}\`${"b".repeat(LONG_BODY_INSIDE_ENTITY)}\``; +} + +type AnyThread = Thread; + +export const MARKDOWN_DEMOS: { + id: string; + label: string; + run: (thread: AnyThread) => Promise; +}[] = [ + { + id: "md.plain", + label: "Plain text", + run: async (thread) => { + await thread.post("Hello, this is plain text. No formatting."); + }, + }, + { + id: "md.emphasis", + label: "Inline emphasis", + run: async (thread) => { + await thread.post({ + markdown: "**bold** and *italic* and ~~strike~~ and `inline code`", + }); + }, + }, + { + id: "md.code", + label: "Code block", + run: async (thread) => { + await thread.post({ + markdown: [ + "```bash", + "# pipes, dots, bangs, parens must render literally", + "curl 'https://api.example.com/v1/rates' | jq '.rate' > out.txt", + "```", + ].join("\n"), + }); + }, + }, + { + id: "md.links", + label: "Links", + run: async (thread) => { + await thread.post({ + markdown: + "Visit [Vercel](https://vercel.com) and also [this (weird!) label](https://example.com/path?x=1)", + }); + }, + }, + { + id: "md.list-table", + label: "List + table", + run: async (thread) => { + await thread.post({ + markdown: [ + "- **first** item", + "- second *item*", + "- third `item`", + "", + "| Name | Age | City |", + "|------|-----|------|", + "| Alice | 30 | Lisbon |", + "| Bob | 25 | Porto |", + ].join("\n"), + }); + }, + }, + { + id: "md.torture", + label: "Torture string", + run: async (thread) => { + await thread.post({ + markdown: + "Escape matrix: _ * [ ] ( ) ~ ` > # + - = | { } . ! \\ all at once", + }); + }, + }, + { + id: "md.llm", + label: "LLM-style response", + run: async (thread) => { + await thread.post({ markdown: LLM_CORPUS }); + }, + }, + { + id: "md.streaming", + label: "Streaming demo", + run: async (thread) => { + async function* iter(): AsyncIterable { + for (const chunk of STREAMING_CHUNKS) { + yield chunk; + await new Promise((resolve) => + setTimeout(resolve, STREAM_CHUNK_DELAY_MS) + ); + } + } + await thread.post(iter()); + }, + }, + { + id: "md.long-plain", + label: "Long (5000 plain)", + run: async (thread) => { + await thread.post({ markdown: longPlainMarkdown() }); + }, + }, + { + id: "md.long-bold", + label: "Long (bold crosses 4096)", + run: async (thread) => { + await thread.post({ markdown: longWithUnclosedBold() }); + }, + }, + { + id: "md.long-code", + label: "Long (code crosses 4096)", + run: async (thread) => { + await thread.post({ markdown: longWithUnclosedCode() }); + }, + }, +]; diff --git a/examples/telegram-chat/src/demos/media.ts b/examples/telegram-chat/src/demos/media.ts new file mode 100644 index 00000000..014049cf --- /dev/null +++ b/examples/telegram-chat/src/demos/media.ts @@ -0,0 +1,132 @@ +/** + * Media & Reactions demos. + * + * The reactions demo breaks the otherwise-stateless bot: it briefly + * subscribes the thread to a one-shot "react to the next message" handler + * that unsubscribes after firing (or after a timeout). + * + * File upload demos generate a 1×1 PNG and a minimal PDF in memory and + * post them as attachments. Telegram's adapter treats them as documents + * (single-file-per-message constraint). + */ + +import type { SubscribedMessageHandler, Thread } from "chat"; +import { generateMinimalPdf } from "../lib/pdf"; +import { generate1x1Png } from "../lib/png"; + +type AnyThread = Thread; + +/** + * Subset of the Chat surface media demos use. Takes the + * `onSubscribedMessage` registration function rather than the whole Chat + * instance to avoid type-parameter leaks. + */ +export interface MediaDemoChat { + onSubscribedMessage(handler: SubscribedMessageHandler): void; +} + +const REACTION_WINDOW_MS = 30_000; +const REACTION_EMOJIS = ["❤", "🔥", "👍"]; + +export const MEDIA_DEMOS: { + id: string; + label: string; + run: (thread: AnyThread, chat: MediaDemoChat) => Promise; +}[] = [ + { + id: "media.reactions", + label: "Reactions demo", + run: async (thread, chat) => { + await thread.post( + `🧪 Send me any message in the next 30 seconds and I'll react to it.` + ); + await armReactionOneShot(thread, chat); + }, + }, + { + id: "media.upload-png", + label: "Upload PNG", + run: async (thread) => { + await thread.post({ + markdown: "📎 Sending a tiny generated PNG…", + files: [ + { + filename: "demo.png", + data: generate1x1Png(), + mimeType: "image/png", + }, + ], + }); + }, + }, + { + id: "media.upload-pdf", + label: "Upload PDF", + run: async (thread) => { + await thread.post({ + markdown: "📎 Sending a generated single-page PDF…", + files: [ + { + filename: "demo.pdf", + data: generateMinimalPdf("Hello from telegram-chat!"), + mimeType: "application/pdf", + }, + ], + }); + }, + }, +]; + +/** + * Subscribes the thread briefly and registers a one-shot message handler + * that reacts to the next incoming message. The handler unsubscribes + * after firing or after REACTION_WINDOW_MS, whichever comes first. + */ +async function armReactionOneShot( + thread: AnyThread, + chat: MediaDemoChat +): Promise { + await thread.subscribe(); + + let fired = false; + + const timeout = setTimeout(async () => { + if (fired) { + return; + } + fired = true; + try { + await thread.unsubscribe(); + await thread.post("⌛ Reaction window closed. Try again from the menu."); + } catch (err) { + console.error("[reactions] failed to unsubscribe on timeout", err); + } + }, REACTION_WINDOW_MS); + + chat.onSubscribedMessage(async (subscribedThread, message) => { + if (fired || subscribedThread.id !== thread.id) { + return; + } + fired = true; + clearTimeout(timeout); + try { + for (const emoji of REACTION_EMOJIS) { + if (subscribedThread.adapter.addReaction) { + await subscribedThread.adapter.addReaction( + subscribedThread.id, + message.id, + emoji + ); + } + } + await subscribedThread.post("✅ Reactions sent."); + } catch (err) { + console.error("[reactions] add failed", err); + await subscribedThread.post( + `❌ Reaction — ${(err as Error).message ?? String(err)}` + ); + } finally { + await subscribedThread.unsubscribe(); + } + }); +} diff --git a/examples/telegram-chat/src/index.ts b/examples/telegram-chat/src/index.ts new file mode 100644 index 00000000..413e79fb --- /dev/null +++ b/examples/telegram-chat/src/index.ts @@ -0,0 +1,182 @@ +/** + * telegram-chat — reference bot for the Chat SDK's Telegram adapter. + * + * Boots a polling-mode Telegram adapter, wires mention and action handlers, + * routes both to the menu state machine in menu.tsx. Stateless beyond the + * reactions demo, which briefly subscribes and self-unsubscribes. + */ + +import { createMemoryState } from "@chat-adapter/state-memory"; +import { createTelegramAdapter } from "@chat-adapter/telegram"; +import { type ActionEvent, Chat, type Logger, type Thread } from "chat"; +import { APPROVAL_DEMO_ID, buildDecidedCard, CARD_DEMOS } from "./demos/cards"; +import { MARKDOWN_DEMOS } from "./demos/markdown"; +import { MEDIA_DEMOS, type MediaDemoChat } from "./demos/media"; +import { decode } from "./lib/callbacks"; +import { postMainMenu, postMenu } from "./menu"; + +const TELEGRAM_BOT_TOKEN = process.env.TELEGRAM_BOT_TOKEN; +if (!TELEGRAM_BOT_TOKEN) { + console.error( + "TELEGRAM_BOT_TOKEN is not set. Create a bot with @BotFather and export it." + ); + process.exit(1); +} + +const BOT_USERNAME = process.env.TELEGRAM_BOT_USERNAME ?? "telegramchatdemobot"; + +const logger: Logger = { + debug: (msg, meta) => console.debug(`[debug] ${msg}`, meta ?? ""), + info: (msg, meta) => console.log(`[info] ${msg}`, meta ?? ""), + warn: (msg, meta) => console.warn(`[warn] ${msg}`, meta ?? ""), + error: (msg, meta) => console.error(`[error] ${msg}`, meta ?? ""), + child: () => logger, +}; + +const state = createMemoryState(); +const telegram = createTelegramAdapter({ + botToken: TELEGRAM_BOT_TOKEN, + mode: "polling", + userName: BOT_USERNAME, + logger, +}); + +const chat = new Chat({ + userName: BOT_USERNAME, + adapters: { telegram }, + state, + logger, +}); + +type DemoRunner = (thread: Thread) => Promise; + +const DEMO_LOOKUP = new Map(); + +for (const demo of MARKDOWN_DEMOS) { + DEMO_LOOKUP.set(demo.id, { label: demo.label, run: demo.run }); +} +for (const demo of CARD_DEMOS) { + DEMO_LOOKUP.set(demo.id, { label: demo.label, run: demo.run }); +} + +const mediaDemoChat: MediaDemoChat = { + onSubscribedMessage: (handler) => chat.onSubscribedMessage(handler), +}; +for (const demo of MEDIA_DEMOS) { + DEMO_LOOKUP.set(demo.id, { + label: demo.label, + run: (thread) => demo.run(thread, mediaDemoChat), + }); +} + +// Any DM text opens the main menu. Telegram DMs route every message as a +// mention because the bot is the only other participant in the chat. +chat.onNewMention(async (thread, message) => { + console.log(`[bot] incoming text: ${message.text}`); + try { + await postMainMenu(thread); + } catch (err) { + console.error("[bot] failed to post main menu", err); + } +}); + +// The reactions demo registers its own short-lived onSubscribedMessage +// handler and unsubscribes as soon as it fires or times out. No global +// subscribed-message handler is needed here. + +// All button callbacks route through here. +chat.onAction(async (event) => { + const raw = event.actionId; + const parsed = decode(raw); + if (!parsed) { + console.warn(`[bot] unknown callback_data: ${raw}`); + return; + } + + const thread = event.thread; + if (!thread) { + console.warn(`[bot] action ${raw} received with no thread`); + return; + } + + if (parsed.kind === "nav") { + await postMenu(thread, parsed.menu); + return; + } + + if (parsed.kind === "run") { + const demo = DEMO_LOOKUP.get(parsed.demo); + if (!demo) { + await thread.post(`❌ Unknown demo: ${parsed.demo}`); + return; + } + try { + await demo.run(thread); + } catch (err) { + console.error(`[bot] demo ${parsed.demo} failed`, err); + await thread.post( + `❌ ${demo.label} — ${(err as Error).message ?? String(err)}` + ); + } + return; + } + + if (parsed.kind === "act") { + await handleAction(parsed.demo, parsed.arg, event); + return; + } +}); + +async function handleAction( + demo: string, + arg: string, + event: ActionEvent +): Promise { + const thread = event.thread; + if (!thread) { + return; + } + + if (demo === APPROVAL_DEMO_ID && (arg === "approve" || arg === "reject")) { + try { + const card = buildDecidedCard( + arg, + event.user.userName || event.user.fullName, + new Date() + ); + await event.adapter.editMessage(event.threadId, event.messageId, { + card, + }); + } catch (err) { + console.error("[bot] approval edit failed", err); + await thread.post( + `❌ Approval update — ${(err as Error).message ?? String(err)}` + ); + } + return; + } + + // Size-probe: Telegram rejects the oversize button at post time, so a + // click on the acceptable one just reports success. + if (demo === "card.size") { + await thread.post( + arg === "ok" + ? "✅ Small payload delivered successfully." + : `ℹ️ Unexpected action: ${arg}` + ); + return; + } + + console.warn(`[bot] unhandled action ${demo}:${arg}`); +} + +console.log("[boot] initializing chat…"); +await chat.initialize(); +console.log( + `[boot] polling for messages. DM @${BOT_USERNAME} any text to open the menu.` +); + +process.on("SIGINT", () => { + console.log("\n[boot] shutting down…"); + process.exit(0); +}); diff --git a/examples/telegram-chat/src/lib/callbacks.ts b/examples/telegram-chat/src/lib/callbacks.ts new file mode 100644 index 00000000..d18337cc --- /dev/null +++ b/examples/telegram-chat/src/lib/callbacks.ts @@ -0,0 +1,38 @@ +/** + * Compact callback_data encoding for menu navigation. + * + * Telegram caps callback_data at 64 bytes (per inline_keyboard button). + * Short keys ("nav", "run", "act") and short demo IDs keep every payload + * well under the limit and far from the adapter's 64-byte ValidationError. + */ + +export type MenuCallback = + | { kind: "nav"; menu: string } + | { kind: "run"; demo: string } + | { kind: "act"; demo: string; arg: string }; + +export function encode(cb: MenuCallback): string { + if (cb.kind === "nav") { + return `nav:${cb.menu}`; + } + if (cb.kind === "run") { + return `run:${cb.demo}`; + } + return `act:${cb.demo}:${cb.arg}`; +} + +export function decode(raw: string): MenuCallback | null { + const parts = raw.split(":"); + const [kind, ...rest] = parts; + + if (kind === "nav" && rest.length === 1 && rest[0]) { + return { kind: "nav", menu: rest[0] }; + } + if (kind === "run" && rest.length === 1 && rest[0]) { + return { kind: "run", demo: rest[0] }; + } + if (kind === "act" && rest.length === 2 && rest[0] && rest[1]) { + return { kind: "act", demo: rest[0], arg: rest[1] }; + } + return null; +} diff --git a/examples/telegram-chat/src/lib/pdf.ts b/examples/telegram-chat/src/lib/pdf.ts new file mode 100644 index 00000000..d1e015d7 --- /dev/null +++ b/examples/telegram-chat/src/lib/pdf.ts @@ -0,0 +1,44 @@ +/** + * Generate a minimal valid PDF in memory. No binary-processing dependency. + * + * Builds a one-page PDF with a single line of text, using hand-written + * PDF 1.4 object syntax. Cross-reference table offsets are computed from + * object string lengths so the output is byte-for-byte valid. + */ + +export function generateMinimalPdf(text: string): Buffer { + const objects = [ + "1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n", + "2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n", + "3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] " + + "/Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >>\nendobj\n", + `4 0 obj\n<< /Length ${ + `BT /F1 18 Tf 72 720 Td (${escapePdfText(text)}) Tj ET`.length + } >>\nstream\nBT /F1 18 Tf 72 720 Td (${escapePdfText(text)}) Tj ET\nendstream\nendobj\n`, + "5 0 obj\n<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>\nendobj\n", + ]; + + const header = "%PDF-1.4\n"; + const offsets: number[] = []; + let body = header; + for (const obj of objects) { + offsets.push(body.length); + body += obj; + } + + const xrefOffset = body.length; + let xref = `xref\n0 ${objects.length + 1}\n0000000000 65535 f \n`; + for (const offset of offsets) { + xref += `${offset.toString().padStart(10, "0")} 00000 n \n`; + } + + const trailer = + `trailer\n<< /Size ${objects.length + 1} /Root 1 0 R >>\n` + + `startxref\n${xrefOffset}\n%%EOF`; + + return Buffer.from(body + xref + trailer, "binary"); +} + +function escapePdfText(text: string): string { + return text.replace(/[\\()]/g, "\\$&"); +} diff --git a/examples/telegram-chat/src/lib/png.ts b/examples/telegram-chat/src/lib/png.ts new file mode 100644 index 00000000..64bf4d55 --- /dev/null +++ b/examples/telegram-chat/src/lib/png.ts @@ -0,0 +1,81 @@ +/** + * Generate a minimal valid PNG in memory. No binary-processing dependency. + * + * The bytes below encode a 1×1 PNG: signature + IHDR (width 1, height 1, + * 8-bit greyscale) + IDAT (zlib-wrapped single black pixel) + IEND. + * Telegram accepts this as a photo/document upload. + */ + +const PNG_1X1_BLACK: readonly number[] = [ + 0x89, + 0x50, + 0x4e, + 0x47, + 0x0d, + 0x0a, + 0x1a, + 0x0a, // PNG signature + 0x00, + 0x00, + 0x00, + 0x0d, + 0x49, + 0x48, + 0x44, + 0x52, // IHDR length + type + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x01, // width=1, height=1 + 0x08, + 0x00, + 0x00, + 0x00, + 0x00, + 0x3b, + 0x7e, + 0x9b, // 8-bit grey, crc + 0x55, + 0x00, + 0x00, + 0x00, + 0x0a, + 0x49, + 0x44, + 0x41, // IDAT length + type + 0x54, + 0x78, + 0x9c, + 0x62, + 0x00, + 0x00, + 0x00, + 0x00, // deflate stream + 0x05, + 0x00, + 0x01, + 0x0d, + 0x0a, + 0x2d, + 0xb4, + 0x00, // IDAT crc + 0x00, + 0x00, + 0x00, + 0x49, + 0x45, + 0x4e, + 0x44, + 0xae, // IEND + 0x42, + 0x60, + 0x82, +]; + +export function generate1x1Png(): Buffer { + return Buffer.from(PNG_1X1_BLACK); +} diff --git a/examples/telegram-chat/src/menu.tsx b/examples/telegram-chat/src/menu.tsx new file mode 100644 index 00000000..607796d6 --- /dev/null +++ b/examples/telegram-chat/src/menu.tsx @@ -0,0 +1,120 @@ +/** + * Menu tree: three top-level categories, each with its own sub-menu. + * + * Menus are inline-keyboard cards. Telegram lays out buttons inside one + * block on a single row, which gets unreadable with more than + * 3–4 items. Each menu button lives in its own so every button + * becomes a standalone row (vertical stack). + */ + +import { + Actions, + Button, + Card, + CardText, + type ChatElement, + type Thread, +} from "chat"; +import { CARD_DEMOS } from "./demos/cards"; +import { MARKDOWN_DEMOS } from "./demos/markdown"; +import { MEDIA_DEMOS } from "./demos/media"; +import { encode } from "./lib/callbacks"; + +type AnyThread = Thread; + +const MAIN_MENU_ID = "main"; +const TEXT_MENU_ID = "text"; +const CARDS_MENU_ID = "cards"; +const MEDIA_MENU_ID = "media"; + +interface MenuItem { + id: string; + label: string; +} + +/** + * Wrap a list of {id,label} items as one block per row, plus + * a "← Back" row at the end. + */ +function renderRows(items: MenuItem[], parent: string): ChatElement[] { + const rows = items.map((item) => ( + + + + )); + rows.push( + + + + ); + return rows; +} + +export async function postMainMenu(thread: AnyThread): Promise { + await thread.post( + + Pick a category to explore the Chat SDK on Telegram. + + + + + + + + + + + ); +} + +async function postTextMenu(thread: AnyThread): Promise { + await thread.post( + + MarkdownV2 rendering demos. + {renderRows(MARKDOWN_DEMOS, MAIN_MENU_ID)} + + ); +} + +async function postCardsMenu(thread: AnyThread): Promise { + await thread.post( + + Structured cards with inline keyboards. + {renderRows(CARD_DEMOS, MAIN_MENU_ID)} + + ); +} + +async function postMediaMenu(thread: AnyThread): Promise { + await thread.post( + + Attachments and emoji reactions. + {renderRows(MEDIA_DEMOS, MAIN_MENU_ID)} + + ); +} + +export async function postMenu( + thread: AnyThread, + menuId: string +): Promise { + if (menuId === TEXT_MENU_ID) { + await postTextMenu(thread); + return; + } + if (menuId === CARDS_MENU_ID) { + await postCardsMenu(thread); + return; + } + if (menuId === MEDIA_MENU_ID) { + await postMediaMenu(thread); + return; + } + await postMainMenu(thread); +} diff --git a/examples/telegram-chat/tsconfig.json b/examples/telegram-chat/tsconfig.json new file mode 100644 index 00000000..5b7217e5 --- /dev/null +++ b/examples/telegram-chat/tsconfig.json @@ -0,0 +1,10 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "noEmit": true, + "lib": ["ES2022"], + "jsx": "react-jsx", + "jsxImportSource": "chat" + }, + "include": ["src/**/*"] +} diff --git a/packages/adapter-telegram/README.md b/packages/adapter-telegram/README.md index 3a5ca8a5..207db959 100644 --- a/packages/adapter-telegram/README.md +++ b/packages/adapter-telegram/README.md @@ -149,7 +149,7 @@ TELEGRAM_API_BASE_URL=https://api.telegram.org | Feature | Supported | |---------|-----------| -| Card format | Markdown + inline keyboard buttons | +| Card format | MarkdownV2 + inline keyboard buttons | | Buttons | Inline keyboard callbacks | | Link buttons | Inline keyboard URLs | | Select menus | No | diff --git a/packages/adapter-telegram/src/index.test.ts b/packages/adapter-telegram/src/index.test.ts index dfb8b28f..f7770323 100644 --- a/packages/adapter-telegram/src/index.test.ts +++ b/packages/adapter-telegram/src/index.test.ts @@ -15,6 +15,11 @@ import { type TelegramMessage, type TelegramReactionType, } from "./index"; +import { + TELEGRAM_CAPTION_LIMIT, + TELEGRAM_MESSAGE_LIMIT, + TelegramFormatConverter, +} from "./markdown"; const mockLogger: Logger = { debug: vi.fn(), @@ -996,7 +1001,103 @@ describe("TelegramAdapter", () => { String((mockFetch.mock.calls[1]?.[1] as RequestInit).body) ) as { parse_mode?: string }; - expect(sendMessageBody.parse_mode).toBe("Markdown"); + expect(sendMessageBody.parse_mode).toBe("MarkdownV2"); + }); + + it("sets parse_mode for AST messages", async () => { + mockFetch + .mockResolvedValueOnce( + telegramOk({ + id: 999, + is_bot: true, + first_name: "Bot", + username: "mybot", + }) + ) + .mockResolvedValueOnce(telegramOk(sampleMessage())); + + const adapter = createTelegramAdapter({ + botToken: "token", + mode: "webhook", + logger: mockLogger, + userName: "mybot", + }); + + await adapter.initialize(createMockChat()); + + const ast = new TelegramFormatConverter().toAst("**hello** world!"); + await adapter.postMessage("telegram:123", { ast }); + + const sendMessageBody = JSON.parse( + String((mockFetch.mock.calls[1]?.[1] as RequestInit).body) + ) as { parse_mode?: string; text: string }; + + // AST messages were shipping without parse_mode, so Telegram rendered + // MarkdownV2 asterisks literally. Guard against regression. + expect(sendMessageBody.parse_mode).toBe("MarkdownV2"); + expect(sendMessageBody.text).toContain("*hello*"); + expect(sendMessageBody.text).toContain("world\\!"); + }); + + it("omits parse_mode for plain string messages", async () => { + mockFetch + .mockResolvedValueOnce( + telegramOk({ + id: 999, + is_bot: true, + first_name: "Bot", + username: "mybot", + }) + ) + .mockResolvedValueOnce(telegramOk(sampleMessage())); + + const adapter = createTelegramAdapter({ + botToken: "token", + mode: "webhook", + logger: mockLogger, + userName: "mybot", + }); + + await adapter.initialize(createMockChat()); + + await adapter.postMessage("telegram:123", "plain text message"); + + const sendMessageBody = JSON.parse( + String((mockFetch.mock.calls[1]?.[1] as RequestInit).body) + ) as { parse_mode?: string }; + + expect(sendMessageBody.parse_mode).toBeUndefined(); + }); + + it("omits parse_mode for raw messages", async () => { + mockFetch + .mockResolvedValueOnce( + telegramOk({ + id: 999, + is_bot: true, + first_name: "Bot", + username: "mybot", + }) + ) + .mockResolvedValueOnce(telegramOk(sampleMessage())); + + const adapter = createTelegramAdapter({ + botToken: "token", + mode: "webhook", + logger: mockLogger, + userName: "mybot", + }); + + await adapter.initialize(createMockChat()); + + await adapter.postMessage("telegram:123", { raw: "raw.unparsed!(text)" }); + + const sendMessageBody = JSON.parse( + String((mockFetch.mock.calls[1]?.[1] as RequestInit).body) + ) as { parse_mode?: string; text: string }; + + expect(sendMessageBody.parse_mode).toBeUndefined(); + expect(sendMessageBody.text).toBe("raw.unparsed!(text)"); }); it("posts cards with inline keyboard buttons", async () => { @@ -1055,7 +1156,7 @@ describe("TelegramAdapter", () => { const row = sendMessageBody.reply_markup?.inline_keyboard[0]; expect(row).toBeDefined(); - expect(sendMessageBody.parse_mode).toBe("Markdown"); + expect(sendMessageBody.parse_mode).toBe("MarkdownV2"); expect(row?.[0]).toEqual({ text: "Approve", callback_data: encodeTelegramCallbackData("approve", "request-123"), @@ -1066,6 +1167,53 @@ describe("TelegramAdapter", () => { }); }); + it("renders card title as MarkdownV2 bold", async () => { + mockFetch + .mockResolvedValueOnce( + telegramOk({ + id: 999, + is_bot: true, + first_name: "Bot", + username: "mybot", + }) + ) + .mockResolvedValueOnce(telegramOk(sampleMessage())); + + const adapter = createTelegramAdapter({ + botToken: "token", + mode: "webhook", + logger: mockLogger, + userName: "mybot", + }); + + await adapter.initialize(createMockChat()); + + await adapter.postMessage("telegram:123", { + type: "card", + title: "Order #1234", + children: [ + { + type: "section", + children: [{ type: "text", content: "Approval needed." }], + }, + ], + }); + + const sendMessageBody = JSON.parse( + String((mockFetch.mock.calls[1]?.[1] as RequestInit).body) + ) as { parse_mode?: string; text: string }; + + // cardToFallbackText (from @chat-adapter/shared) defaults boldFormat + // to "*" (single asterisk, Slack mrkdwn). For Telegram the adapter + // passes `boldFormat: "**"` so the standard-markdown bold survives + // the `fromMarkdown` → AST → MarkdownV2 pipeline as real bold + // (`*Title*`), not italic (`_Title_`) or literal asterisks. + // Inner special chars (here `#`) are escaped per MarkdownV2 rules. + expect(sendMessageBody.parse_mode).toBe("MarkdownV2"); + expect(sendMessageBody.text).toContain("*Order \\#1234*"); + expect(sendMessageBody.text).not.toContain("\\*"); + }); + it("adds and removes reactions", async () => { mockFetch .mockResolvedValueOnce( @@ -1860,6 +2008,232 @@ describe("TelegramAdapter", () => { }); }); +describe("message length limits", () => { + function getMeOk(): Response { + return telegramOk({ + id: 999, + is_bot: true, + first_name: "Bot", + username: "mybot", + }); + } + + async function createInitializedAdapter(): Promise { + mockFetch.mockResolvedValueOnce(getMeOk()); + const adapter = createTelegramAdapter({ + botToken: "token", + mode: "webhook", + logger: mockLogger, + userName: "mybot", + }); + await adapter.initialize(createMockChat()); + return adapter; + } + + function readSentBody(callIndex: number): { + text?: string; + parse_mode?: string; + } { + return JSON.parse( + String((mockFetch.mock.calls[callIndex]?.[1] as RequestInit).body) + ) as { text?: string; parse_mode?: string }; + } + + /** + * Count unescaped occurrences of a single-char entity delimiter. + * Preceded by `\` means escaped; we ignore those. Double `\\` means a + * literal backslash, so the following delimiter is unescaped. + */ + function countUnescaped(text: string, marker: string): number { + let count = 0; + for (let i = 0; i < text.length; i++) { + if (text[i] !== marker) { + continue; + } + let backslashes = 0; + let j = i - 1; + while (j >= 0 && text[j] === "\\") { + backslashes++; + j--; + } + // Even number of preceding backslashes → marker is unescaped + if (backslashes % 2 === 0) { + count++; + } + } + return count; + } + + function endsWithOrphanBackslash(text: string): boolean { + let trailing = 0; + for (let i = text.length - 1; i >= 0 && text[i] === "\\"; i--) { + trailing++; + } + // Odd trailing backslashes = last `\` has nothing to escape + return trailing % 2 === 1; + } + + it("plain string over 4096 chars truncates to exactly the limit with '...' and no parse_mode", async () => { + const adapter = await createInitializedAdapter(); + mockFetch.mockResolvedValueOnce(telegramOk(sampleMessage())); + + const longPlain = "a".repeat(5000); + await adapter.postMessage("telegram:123", longPlain); + + const body = readSentBody(1); + expect(body.parse_mode).toBeUndefined(); + expect(body.text?.length).toBeLessThanOrEqual(TELEGRAM_MESSAGE_LIMIT); + expect(body.text?.endsWith("...")).toBe(true); + // Plain-text path: the literal ellipsis is fine + expect(body.text?.endsWith("\\.\\.\\.")).toBe(false); + }); + + it("plain string exactly 4096 chars is not truncated and has no ellipsis", async () => { + const adapter = await createInitializedAdapter(); + mockFetch.mockResolvedValueOnce(telegramOk(sampleMessage())); + + const exact = "a".repeat(TELEGRAM_MESSAGE_LIMIT); + await adapter.postMessage("telegram:123", exact); + + const body = readSentBody(1); + expect(body.text).toBe(exact); + }); + + it("MarkdownV2 message over 4096 chars escapes the trailing ellipsis as '\\.\\.\\.'", async () => { + const adapter = await createInitializedAdapter(); + mockFetch.mockResolvedValueOnce(telegramOk(sampleMessage())); + + // 5000 'a' chars through the markdown path renders to 5000 'a' (nothing to escape). + // Must end with escaped ellipsis, NOT literal dots. + await adapter.postMessage("telegram:123", { + markdown: "a".repeat(5000), + }); + + const body = readSentBody(1); + expect(body.parse_mode).toBe("MarkdownV2"); + expect(body.text?.length).toBeLessThanOrEqual(TELEGRAM_MESSAGE_LIMIT); + expect(body.text?.endsWith("\\.\\.\\.")).toBe(true); + }); + + it("MarkdownV2 truncation does not leave an orphan trailing backslash before the ellipsis", async () => { + const adapter = await createInitializedAdapter(); + mockFetch.mockResolvedValueOnce(telegramOk(sampleMessage())); + + // Construct input so the rendered text has an escape sequence (`\.`) + // straddling the 4096 - ellipsisLen boundary. 4092 'a's + 50 '.' → renders + // as 4092 'a's + `\.`×50. Naïve slice-to-4093 keeps 4092 'a' + a lone '\'. + const longWithDots = "a".repeat(4092) + ".".repeat(50); + await adapter.postMessage("telegram:123", { markdown: longWithDots }); + + const body = readSentBody(1); + const text = body.text ?? ""; + // Strip the trailing ellipsis (escaped or not) before checking the body + const ellipsis = text.endsWith("\\.\\.\\.") ? "\\.\\.\\." : "..."; + const beforeEllipsis = text.slice(0, -ellipsis.length); + expect(endsWithOrphanBackslash(beforeEllipsis)).toBe(false); + }); + + it("MarkdownV2 truncation leaves all entity delimiters balanced (no unclosed **bold**)", async () => { + const adapter = await createInitializedAdapter(); + mockFetch.mockResolvedValueOnce(telegramOk(sampleMessage())); + + // Long bold span crossing the limit: 4000 'a' + `**` + 1000 'b' + `**` + // Rendered MarkdownV2: 4000 'a' + `*` + 1000 'b' + `*` → 5002 chars. + // Naïve truncate keeps the opening `*` without its closer. + const bolded = `${"a".repeat(4000)}**${"b".repeat(1000)}**`; + await adapter.postMessage("telegram:123", { markdown: bolded }); + + const body = readSentBody(1); + const text = body.text ?? ""; + const ellipsis = text.endsWith("\\.\\.\\.") ? "\\.\\.\\." : "..."; + const beforeEllipsis = text.slice(0, -ellipsis.length); + + // Every entity delimiter must appear an even number of unescaped times + for (const marker of ["*", "_", "~", "`"]) { + expect( + countUnescaped(beforeEllipsis, marker) % 2, + `${marker} count must be even` + ).toBe(0); + } + }); + + it("MarkdownV2 truncation closes or drops an unmatched inline code span", async () => { + const adapter = await createInitializedAdapter(); + mockFetch.mockResolvedValueOnce(telegramOk(sampleMessage())); + + // Long inline code span crossing the limit + const coded = `${"a".repeat(4000)}\`${"b".repeat(1000)}\``; + await adapter.postMessage("telegram:123", { markdown: coded }); + + const body = readSentBody(1); + const text = body.text ?? ""; + const ellipsis = text.endsWith("\\.\\.\\.") ? "\\.\\.\\." : "..."; + const beforeEllipsis = text.slice(0, -ellipsis.length); + + expect(countUnescaped(beforeEllipsis, "`") % 2).toBe(0); + }); + + it("MarkdownV2 caption over 1024 escapes the ellipsis", async () => { + const adapter = await createInitializedAdapter(); + mockFetch.mockResolvedValueOnce(telegramOk(sampleMessage())); + + const longMarkdown = "a".repeat(1500); + await adapter.postMessage("telegram:123", { + markdown: longMarkdown, + files: [ + { + filename: "report.txt", + data: Buffer.from("payload"), + mimeType: "text/plain", + }, + ], + }); + + // sendDocument uses multipart/form-data, not JSON. Pull the caption field + // out of the FormData body. + const formData = mockFetch.mock.calls[1]?.[1]?.body as FormData; + const caption = formData.get("caption"); + const parseMode = formData.get("parse_mode"); + + expect(typeof caption).toBe("string"); + expect((caption as string).length).toBeLessThanOrEqual( + TELEGRAM_CAPTION_LIMIT + ); + expect(parseMode).toBe("MarkdownV2"); + expect((caption as string).endsWith("\\.\\.\\.")).toBe(true); + }); + + it("plain-string caption over 1024 uses literal '...' ellipsis", async () => { + const adapter = await createInitializedAdapter(); + mockFetch.mockResolvedValueOnce(telegramOk(sampleMessage())); + + // Plain string message with a file attachment → caption path, no parse_mode. + // There's no public API to send a plain string with files, so test via the + // markdown path but with content containing no special chars — and assert + // the ellipsis behavior matches parse_mode. Since markdown path always + // emits MarkdownV2, we use the markdown path here and rely on the + // MarkdownV2 caption test for the parse_mode branch; this test documents + // that the caption truncation limit is wired correctly. + const longMarkdown = "a".repeat(1500); + await adapter.postMessage("telegram:123", { + markdown: longMarkdown, + files: [ + { + filename: "report.txt", + data: Buffer.from("payload"), + mimeType: "text/plain", + }, + ], + }); + + const formData = mockFetch.mock.calls[1]?.[1]?.body as FormData; + const caption = formData.get("caption"); + expect((caption as string).length).toBeLessThanOrEqual( + TELEGRAM_CAPTION_LIMIT + ); + }); +}); + describe("applyTelegramEntities", () => { it("returns text unchanged when no entities", () => { expect(applyTelegramEntities("hello world", [])).toBe("hello world"); diff --git a/packages/adapter-telegram/src/index.ts b/packages/adapter-telegram/src/index.ts index eb0e263d..6678a5a9 100644 --- a/packages/adapter-telegram/src/index.ts +++ b/packages/adapter-telegram/src/index.ts @@ -38,7 +38,14 @@ import { decodeTelegramCallbackData, emptyTelegramInlineKeyboard, } from "./cards"; -import { TelegramFormatConverter } from "./markdown"; +import { + TELEGRAM_CAPTION_LIMIT, + TELEGRAM_MESSAGE_LIMIT, + TelegramFormatConverter, + type TelegramParseMode, + toBotApiParseMode, + truncateForTelegram, +} from "./markdown"; import type { TelegramAdapterConfig, TelegramAdapterMode, @@ -60,11 +67,8 @@ import type { } from "./types"; const TELEGRAM_API_BASE = "https://api.telegram.org"; -const TELEGRAM_MESSAGE_LIMIT = 4096; -const TELEGRAM_CAPTION_LIMIT = 1024; const TELEGRAM_SECRET_TOKEN_HEADER = "x-telegram-bot-api-secret-token"; const MESSAGE_ID_PATTERN = /^([^:]+):(\d+)$/; -const TELEGRAM_MARKDOWN_PARSE_MODE = "Markdown"; const trimTrailingSlashes = (url: string): string => { let end = url.length; while (end > 0 && url[end - 1] === "/") { @@ -103,18 +107,22 @@ interface ResolvedTelegramLongPollingConfig { type TelegramRuntimeMode = "webhook" | "polling"; /** - * Escape markdown special characters inside entity text so wrapping - * with markdown syntax doesn't break parsing. + * Escape standard-markdown special characters inside inbound entity text. + * + * Used only by `applyTelegramEntities` below (inbound path). Outbound + * MarkdownV2 escaping lives in `markdown.ts` (`escapeMarkdownV2`). */ const escapeMarkdownInEntity = (text: string): string => text.replace(/([[\]()\\])/g, "\\$1"); /** - * Convert Telegram message entities to markdown. + * Convert Telegram message entities (inbound) to standard markdown. * * Telegram delivers formatting as separate entity objects alongside plain text. - * This function reconstructs markdown so that links, bold, italic, code, etc. - * are preserved when the text is later parsed as markdown. + * This function reconstructs **standard** markdown (`**bold**`, `~~strike~~`, + * etc.) so the result can be fed into the SDK's `parseMarkdown` — which is + * the canonical AST producer. The outbound direction (AST → MarkdownV2) is + * handled separately by `TelegramFormatConverter.fromAst`. * * Entities use UTF-16 offsets, which match JavaScript's native string indexing. */ @@ -661,13 +669,17 @@ export class TelegramAdapter const card = extractCard(message); const replyMarkup = card ? cardToTelegramInlineKeyboard(card) : undefined; const parseMode = this.resolveParseMode(message, card); - const text = this.truncateMessage( + const text = truncateForTelegram( convertEmojiPlaceholders( card - ? cardToFallbackText(card) + ? this.formatConverter.fromMarkdown( + cardToFallbackText(card, { boldFormat: "**" }) + ) : this.formatConverter.renderPostable(message), "gchat" - ) + ), + TELEGRAM_MESSAGE_LIMIT, + parseMode ); const files = extractFiles(message); @@ -702,7 +714,7 @@ export class TelegramAdapter message_thread_id: parsedThread.messageThreadId, text, reply_markup: replyMarkup, - parse_mode: parseMode, + parse_mode: toBotApiParseMode(parseMode), }); } @@ -747,13 +759,17 @@ export class TelegramAdapter const card = extractCard(message); const replyMarkup = card ? cardToTelegramInlineKeyboard(card) : undefined; const parseMode = this.resolveParseMode(message, card); - const text = this.truncateMessage( + const text = truncateForTelegram( convertEmojiPlaceholders( card - ? cardToFallbackText(card) + ? this.formatConverter.fromMarkdown( + cardToFallbackText(card, { boldFormat: "**" }) + ) : this.formatConverter.renderPostable(message), "gchat" - ) + ), + TELEGRAM_MESSAGE_LIMIT, + parseMode ); if (!text.trim()) { @@ -767,7 +783,7 @@ export class TelegramAdapter message_id: telegramMessageId, text, reply_markup: replyMarkup ?? emptyTelegramInlineKeyboard(), - parse_mode: parseMode, + parse_mode: toBotApiParseMode(parseMode), } ); @@ -1216,7 +1232,7 @@ export class TelegramAdapter }, text: string, replyMarkup?: TelegramInlineKeyboardMarkup, - parseMode?: string + parseMode: TelegramParseMode = "plain" ): Promise { const buffer = await this.toTelegramBuffer(file.data); @@ -1227,9 +1243,13 @@ export class TelegramAdapter } if (text.trim()) { - formData.append("caption", this.truncateCaption(text)); - if (parseMode) { - formData.append("parse_mode", parseMode); + formData.append( + "caption", + truncateForTelegram(text, TELEGRAM_CAPTION_LIMIT, parseMode) + ); + const botApiParseMode = toBotApiParseMode(parseMode); + if (botApiParseMode) { + formData.append("parse_mode", botApiParseMode); } } @@ -1513,26 +1533,22 @@ export class TelegramAdapter private resolveParseMode( message: AdapterPostableMessage, card: ReturnType - ): string | undefined { - const hasMarkdown = - typeof message === "object" && message !== null && "markdown" in message; - return card || hasMarkdown ? TELEGRAM_MARKDOWN_PARSE_MODE : undefined; - } - - private truncateMessage(text: string): string { - if (text.length <= TELEGRAM_MESSAGE_LIMIT) { - return text; + ): TelegramParseMode { + // Cards and any message routed through the format converter are rendered + // as MarkdownV2, so Telegram must parse them with MarkdownV2. + if (card) { + return "MarkdownV2"; } - - return `${text.slice(0, TELEGRAM_MESSAGE_LIMIT - 3)}...`; - } - - private truncateCaption(text: string): string { - if (text.length <= TELEGRAM_CAPTION_LIMIT) { - return text; + // Plain strings and raw messages ship verbatim — no markdown parsing. + if (typeof message === "string") { + return "plain"; } - - return `${text.slice(0, TELEGRAM_CAPTION_LIMIT - 3)}...`; + if (typeof message === "object" && message !== null && "raw" in message) { + return "plain"; + } + // Every other shape ({markdown}, {ast}, JSX, etc.) flows through + // formatConverter.renderPostable, which emits MarkdownV2. + return "MarkdownV2"; } private toTelegramReaction(emoji: EmojiValue | string): TelegramReactionType { @@ -1816,7 +1832,7 @@ export function createTelegramAdapter( return new TelegramAdapter(config ?? {}); } -export { TelegramFormatConverter } from "./markdown"; +export { escapeMarkdownV2, TelegramFormatConverter } from "./markdown"; export type { TelegramAdapterConfig, TelegramAdapterMode, @@ -1826,6 +1842,7 @@ export type { TelegramMessage, TelegramMessageReactionUpdated, TelegramRawMessage, + TelegramReactionType, TelegramThreadId, TelegramUpdate, TelegramUser, diff --git a/packages/adapter-telegram/src/markdown.test.ts b/packages/adapter-telegram/src/markdown.test.ts index 748f3aba..e78b59a6 100644 --- a/packages/adapter-telegram/src/markdown.test.ts +++ b/packages/adapter-telegram/src/markdown.test.ts @@ -1,130 +1,385 @@ import { describe, expect, it } from "vitest"; -import { TelegramFormatConverter } from "./markdown"; +import { + endsWithOrphanBackslash, + escapeMarkdownV2, + findUnescapedPositions, + TelegramFormatConverter, + truncateForTelegram, +} from "./markdown"; const TABLE_PIPE_PATTERN = /\|.*Name.*\|/; +const TRAILING_TRIPLE_BACKTICK_PATTERN = /```\s*$/; +const BASH_CODE_BLOCK_PATTERN = /```bash\n([\s\S]*?)\n```/; +const ESCAPED_ELLIPSIS_PATTERN = /\\\.\\\.\\\.$/; + +// All 20 MarkdownV2 special characters per the Telegram Bot API spec. +// Each must be escaped with a backslash when appearing in normal text. +// https://core.telegram.org/bots/api#markdownv2-style +const MARKDOWNV2_SPECIAL_CHARS = [ + "_", + "*", + "[", + "]", + "(", + ")", + "~", + "`", + ">", + "#", + "+", + "-", + "=", + "|", + "{", + "}", + ".", + "!", + "\\", +]; + +describe("escapeMarkdownV2", () => { + for (const char of MARKDOWNV2_SPECIAL_CHARS) { + it(`escapes the special character ${JSON.stringify(char)}`, () => { + expect(escapeMarkdownV2(`a${char}b`)).toBe(`a\\${char}b`); + }); + } + + it("leaves non-special ASCII untouched", () => { + expect(escapeMarkdownV2("Hello world 123")).toBe("Hello world 123"); + }); + + it("leaves unicode characters untouched", () => { + expect(escapeMarkdownV2("café — €50")).toBe("café — €50"); + }); + + it("escapes multiple special characters in one string", () => { + expect(escapeMarkdownV2("a.b!c(d)")).toBe("a\\.b\\!c\\(d\\)"); + }); + + it("handles empty input", () => { + expect(escapeMarkdownV2("")).toBe(""); + }); +}); describe("TelegramFormatConverter", () => { const converter = new TelegramFormatConverter(); - describe("fromAst (AST -> markdown string)", () => { - it("should convert a plain text paragraph", () => { - const ast = converter.toAst("Hello world"); - const result = converter.fromAst(ast); - expect(result).toContain("Hello world"); + describe("fromAst — inline formatting", () => { + it("passes plain text through unchanged", () => { + expect(converter.fromAst(converter.toAst("Hello world"))).toBe( + "Hello world" + ); }); - it("should convert bold", () => { - const ast = converter.toAst("**bold text**"); - const result = converter.fromAst(ast); - expect(result).toContain("**bold text**"); + it("renders bold with single asterisks", () => { + expect(converter.fromAst(converter.toAst("**bold text**"))).toBe( + "*bold text*" + ); }); - it("should convert italic", () => { - const ast = converter.toAst("*italic text*"); - const result = converter.fromAst(ast); - expect(result).toContain("*italic text*"); + it("renders italic with underscores", () => { + expect(converter.fromAst(converter.toAst("*italic text*"))).toBe( + "_italic text_" + ); }); - it("should convert strikethrough", () => { - const ast = converter.toAst("~~strikethrough~~"); - const result = converter.fromAst(ast); - expect(result).toContain("~~strikethrough~~"); + it("renders strikethrough with single tilde", () => { + expect(converter.fromAst(converter.toAst("~~strikethrough~~"))).toBe( + "~strikethrough~" + ); }); - it("should convert links", () => { - const ast = converter.toAst("[link text](https://example.com)"); - const result = converter.fromAst(ast); - expect(result).toContain("[link text](https://example.com)"); + it("escapes special chars inside bold", () => { + expect(converter.fromAst(converter.toAst("**Note: important!**"))).toBe( + "*Note: important\\!*" + ); }); - it("should preserve inline code", () => { - const ast = converter.toAst("Use `const x = 1`"); - const result = converter.fromAst(ast); - expect(result).toContain("`const x = 1`"); + it("escapes special chars inside italic", () => { + expect(converter.fromAst(converter.toAst("*price: $50.*"))).toBe( + "_price: $50\\._" + ); }); - it("should handle code blocks", () => { - const input = "```js\nconst x = 1;\n```"; - const ast = converter.toAst(input); - const output = converter.fromAst(ast); - expect(output).toContain("```"); + it("preserves inline code content verbatim", () => { + expect(converter.fromAst(converter.toAst("Use `const x = 1`"))).toContain( + "`const x = 1`" + ); + }); + + it("escapes only backtick and backslash inside inline code", () => { + expect( + converter + .fromAst(converter.toAst("Use `foo.bar!` here")) + .includes("`foo.bar!`") + ).toBe(true); + }); + }); + + describe("fromAst — code blocks", () => { + it("wraps code blocks with triple backticks and language", () => { + const output = converter.fromAst( + converter.toAst("```js\nconst x = 1;\n```") + ); + expect(output).toContain("```js"); expect(output).toContain("const x = 1;"); + expect(output).toMatch(TRAILING_TRIPLE_BACKTICK_PATTERN); }); - it("should convert tables to code blocks", () => { - const ast = converter.toAst( - "| Name | Age |\n|------|-----|\n| Alice | 30 |" + it("escapes only backtick and backslash inside fenced code", () => { + const output = converter.fromAst( + converter.toAst("```\nfoo.bar! + (test) = [ok]\n```") ); - const result = converter.fromAst(ast); - expect(result).toContain("```"); - expect(result).toContain("Name"); - expect(result).toContain("Alice"); - expect(result).not.toMatch(TABLE_PIPE_PATTERN); + // Normal-text special chars must NOT be escaped inside code blocks. + expect(output).toContain("foo.bar! + (test) = [ok]"); + }); + + it("escapes a backslash inside fenced code", () => { + const output = converter.fromAst( + converter.toAst("```\npath\\\\to\\\\file\n```") + ); + expect(output).toContain("\\\\"); }); }); - describe("toAst (markdown -> AST)", () => { - it("should parse plain text", () => { - const ast = converter.toAst("Hello world"); - expect(ast.type).toBe("root"); - expect(ast.children.length).toBeGreaterThan(0); + describe("fromAst — links and images", () => { + it("renders inline links", () => { + expect( + converter.fromAst(converter.toAst("[click](https://example.com)")) + ).toBe("[click](https://example.com)"); }); - it("should parse bold", () => { - const ast = converter.toAst("**bold**"); - expect(ast.type).toBe("root"); - expect(ast.children.length).toBeGreaterThan(0); + it("escapes only ) and \\ inside the URL", () => { + const input = "[label](https://example.com/path)"; + expect(converter.fromAst(converter.toAst(input))).toBe( + "[label](https://example.com/path)" + ); }); - it("should parse italic", () => { - const ast = converter.toAst("*italic*"); - expect(ast.type).toBe("root"); - expect(ast.children.length).toBeGreaterThan(0); + it("escapes special chars inside link label text", () => { + const output = converter.fromAst( + converter.toAst("[hello!](https://example.com)") + ); + expect(output).toBe("[hello\\!](https://example.com)"); }); - it("should parse inline code", () => { - const ast = converter.toAst("`code`"); - expect(ast.type).toBe("root"); - expect(ast.children.length).toBeGreaterThan(0); + it("renders an image as a link to the source", () => { + const output = converter.fromAst( + converter.toAst("![alt text](https://example.com/pic.png)") + ); + expect(output).toContain("alt text"); + expect(output).toContain("https://example.com/pic.png"); }); }); - describe("renderPostable", () => { - it("should return a plain string as-is", () => { - const result = converter.renderPostable("Hello world"); - expect(result).toBe("Hello world"); + describe("fromAst — block structures", () => { + it("renders headings as bold (all levels)", () => { + for (const level of [1, 2, 3, 4, 5, 6]) { + const hashes = "#".repeat(level); + const output = converter.fromAst(converter.toAst(`${hashes} Title`)); + expect(output).toBe("*Title*"); + } }); - it("should return an empty string unchanged", () => { - const result = converter.renderPostable(""); - expect(result).toBe(""); + it("renders unordered lists with escaped dashes", () => { + const output = converter.fromAst(converter.toAst("- one\n- two")); + expect(output).toContain("\\- one"); + expect(output).toContain("\\- two"); }); - it("should render a raw message directly", () => { - const result = converter.renderPostable({ raw: "raw content" }); - expect(result).toBe("raw content"); + it("renders ordered lists with escaped periods", () => { + const output = converter.fromAst(converter.toAst("1. first\n2. second")); + expect(output).toContain("1\\. first"); + expect(output).toContain("2\\. second"); }); - it("should render a markdown message", () => { - const result = converter.renderPostable({ markdown: "**bold** text" }); - expect(result).toContain("bold"); + it("renders blockquotes with > prefix per line", () => { + expect(converter.fromAst(converter.toAst("> quoted text"))).toContain( + ">quoted text" + ); }); - it("should render an AST message", () => { - const ast = converter.toAst("Hello from AST"); - const result = converter.renderPostable({ ast }); - expect(result).toContain("Hello from AST"); + it("renders thematic break as escaped em-dashes", () => { + expect(converter.fromAst(converter.toAst("---"))).toBe("———"); + }); + + it("converts tables to ASCII code blocks and drops pipe syntax", () => { + const output = converter.fromAst( + converter.toAst("| Name | Age |\n|------|-----|\n| Alice | 30 |") + ); + expect(output).toContain("```"); + expect(output).toContain("Name"); + expect(output).toContain("Alice"); + expect(output).not.toMatch(TABLE_PIPE_PATTERN); + }); + }); + + describe("fromAst — nested formatting", () => { + it("renders bold containing italic", () => { + // Markdown: **bold _italic_** → MarkdownV2: *bold _italic_* + const ast = converter.toAst("**bold _italic_**"); + const output = converter.fromAst(ast); + expect(output).toContain("*"); + expect(output).toContain("_italic_"); + }); + + it("renders link containing inline code", () => { + const output = converter.fromAst( + converter.toAst("[`code` link](https://example.com)") + ); + expect(output).toContain("`code`"); + expect(output).toContain("https://example.com"); + }); + + it("renders list containing bold", () => { + const output = converter.fromAst( + converter.toAst("- **important** one\n- plain two") + ); + expect(output).toContain("*important*"); + expect(output).toContain("plain two"); + }); + }); + + describe("fromAst — edge cases", () => { + it("handles empty input", () => { + expect(converter.fromAst(converter.toAst(""))).toBe(""); + }); + + it("handles whitespace-only input", () => { + expect(converter.fromAst(converter.toAst(" "))).toBe(""); + }); + + it("trims trailing whitespace", () => { + const output = converter.fromAst(converter.toAst("Hello\n\n")); + expect(output.endsWith("\n")).toBe(false); + }); + + it("escapes HTML input literally rather than interpreting it", () => { + // Telegram MarkdownV2 has no HTML support; raw HTML must not crash. + const output = converter.fromAst(converter.toAst("hi")); + expect(output).not.toContain(""); + }); + }); + + describe("fromAst — MarkdownV2 validity invariant (corpus)", () => { + // A realistic LLM-generated response exercising every node type the SDK + // can produce. The output must be valid MarkdownV2: every special char + // must either be escaped (\X) or live inside a code block / link URL. + const LLM_CORPUS = [ + "# Trip Summary: Morocco", + "", + "Here's your **personalized** 7-day itinerary. Price: $2,450 per person (all-inclusive)!", + "", + "## Day 1 — Arrival in Marrakech", + "", + "- Airport pickup at 14:30", + "- Check-in at *Riad El Fenn* (4-star)", + "- Welcome dinner: [La Mamounia](https://www.mamounia.com/restaurants)", + "", + "> Tip: bring cash — not every souk accepts cards.", + "", + "## Day 2 — Atlas Mountains", + "", + "1. 08:00 breakfast", + "2. 09:00 departure (2h drive)", + "3. Hike to Toubkal base camp", + "", + "Pack: `sunscreen`, `hiking boots`, *layers* (temperatures drop ~10°C).", + "", + "```bash", + "# Exchange rate check", + "curl 'https://api.rates.io/MAD' | jq '.rate'", + "```", + "", + "| Day | Activity | Cost |", + "|-----|----------|------|", + "| 1 | Arrival | $200 |", + "| 2 | Atlas | $350 |", + "", + "---", + "", + "~~Previous version priced at $2,800~~. New total: **$2,450**.", + ].join("\n"); + + it("produces non-empty output covering every structural element", () => { + const output = converter.fromAst(converter.toAst(LLM_CORPUS)); + // Sanity — structural elements all present in some form. + expect(output).toContain("*Trip Summary"); + expect(output).toContain("\\- Airport pickup"); + expect(output).toContain("1\\. 08:00 breakfast"); + expect(output).toContain("_Riad El Fenn_"); + expect(output).toContain( + "[La Mamounia](https://www.mamounia.com/restaurants)" + ); + expect(output).toContain(">Tip:"); + expect(output).toContain("```"); + expect(output).toContain("~Previous version"); + expect(output).toContain("———"); + }); + + it("escapes every in-text MarkdownV2 special character outside code and link URLs", () => { + const output = converter.fromAst(converter.toAst(LLM_CORPUS)); + + // Strip code blocks and link URLs — inside those, different rules apply. + const withoutCodeBlocks = output.replace(/```[\s\S]*?```/g, ""); + const withoutInlineCode = withoutCodeBlocks.replace(/`[^`]*`/g, ""); + const withoutLinkUrls = withoutInlineCode.replace(/\]\([^)]*\)/g, "]()"); + + // For each special char other than the ones that carry markdown + // structure (* _ ~ [ ] ( ) > ` # whose positions we control), any + // occurrence in plain text must be preceded by a backslash. + const TEXT_ONLY_SPECIAL_CHARS = ["+", "=", "{", "}", ".", "!", "|"]; + for (const char of TEXT_ONLY_SPECIAL_CHARS) { + const pattern = new RegExp(`(? { + const output = converter.fromAst(converter.toAst(LLM_CORPUS)); + const codeBlockMatch = BASH_CODE_BLOCK_PATTERN.exec(output); + expect(codeBlockMatch).not.toBeNull(); + const codeContent = codeBlockMatch?.[1] ?? ""; + // These symbols must appear literally — MarkdownV2 only escapes ` and \ here. + expect(codeContent).toContain("'"); + expect(codeContent).toContain("|"); + expect(codeContent).toContain("."); + }); + }); + + describe("renderPostable", () => { + it("returns a plain string as-is", () => { + expect(converter.renderPostable("Hello world")).toBe("Hello world"); }); - it("should render markdown with bold and italic", () => { + it("returns an empty string unchanged", () => { + expect(converter.renderPostable("")).toBe(""); + }); + + it("returns a raw message directly", () => { + expect(converter.renderPostable({ raw: "raw content" })).toBe( + "raw content" + ); + }); + + it("renders a markdown message as MarkdownV2", () => { const result = converter.renderPostable({ markdown: "**bold** and *italic*", }); - expect(result).toContain("**bold**"); - expect(result).toContain("*italic*"); + expect(result).toContain("*bold*"); + expect(result).toContain("_italic_"); + }); + + it("renders an AST message", () => { + const ast = converter.toAst("Hello from AST"); + expect(converter.renderPostable({ ast })).toContain("Hello from AST"); }); - it("should render markdown table as code block", () => { + it("renders a markdown table as a code block", () => { const result = converter.renderPostable({ markdown: "| A | B |\n| --- | --- |\n| 1 | 2 |", }); @@ -133,50 +388,79 @@ describe("TelegramFormatConverter", () => { }); }); + describe("toAst", () => { + it("parses plain text", () => { + const ast = converter.toAst("Hello world"); + expect(ast.type).toBe("root"); + expect(ast.children.length).toBeGreaterThan(0); + }); + + it("parses bold", () => { + const ast = converter.toAst("**bold**"); + expect(ast.type).toBe("root"); + expect(ast.children.length).toBeGreaterThan(0); + }); + + it("parses italic", () => { + const ast = converter.toAst("*italic*"); + expect(ast.type).toBe("root"); + expect(ast.children.length).toBeGreaterThan(0); + }); + + it("parses inline code", () => { + const ast = converter.toAst("`code`"); + expect(ast.type).toBe("root"); + expect(ast.children.length).toBeGreaterThan(0); + }); + }); + describe("extractPlainText", () => { - it("should remove bold markers", () => { + it("strips bold markers", () => { expect(converter.extractPlainText("Hello **world**!")).toBe( "Hello world!" ); }); - it("should remove italic markers", () => { + it("strips italic markers", () => { expect(converter.extractPlainText("Hello *world*!")).toBe("Hello world!"); }); - it("should remove strikethrough markers", () => { + it("strips strikethrough markers", () => { expect(converter.extractPlainText("Hello ~~world~~!")).toBe( "Hello world!" ); }); - it("should extract link text", () => { + it("extracts link text", () => { expect( converter.extractPlainText("Check [this](https://example.com)") ).toBe("Check this"); }); - it("should handle inline code", () => { - const result = converter.extractPlainText("Use `const x = 1`"); - expect(result).toContain("const x = 1"); + it("preserves inline code content", () => { + expect(converter.extractPlainText("Use `const x = 1`")).toContain( + "const x = 1" + ); }); - it("should handle code blocks", () => { - const result = converter.extractPlainText("```js\nconst x = 1;\n```"); - expect(result).toContain("const x = 1;"); + it("preserves code block content", () => { + expect(converter.extractPlainText("```js\nconst x = 1;\n```")).toContain( + "const x = 1;" + ); }); - it("should handle plain text", () => { + it("returns plain text unchanged", () => { expect(converter.extractPlainText("Hello world")).toBe("Hello world"); }); - it("should handle empty string", () => { + it("returns empty string unchanged", () => { expect(converter.extractPlainText("")).toBe(""); }); - it("should strip all formatting from complex input", () => { - const input = "**Bold** and *italic* with [link](https://x.com)"; - const result = converter.extractPlainText(input); + it("strips all formatting from complex input", () => { + const result = converter.extractPlainText( + "**Bold** and *italic* with [link](https://x.com)" + ); expect(result).toContain("Bold"); expect(result).toContain("italic"); expect(result).toContain("link"); @@ -184,38 +468,86 @@ describe("TelegramFormatConverter", () => { expect(result).not.toContain("]("); }); }); +}); - describe("roundtrip", () => { - it("should preserve plain text through toAst -> fromAst", () => { - const input = "Hello world"; - const result = converter.fromAst(converter.toAst(input)); - expect(result).toContain("Hello world"); - }); +describe("truncateForTelegram", () => { + it("returns text unchanged when under limit", () => { + expect(truncateForTelegram("hello", 100, "plain")).toBe("hello"); + }); - it("should preserve bold through toAst -> fromAst", () => { - const input = "**bold text**"; - const result = converter.fromAst(converter.toAst(input)); - expect(result).toContain("**bold text**"); - }); + it("truncates plain text with literal ellipsis", () => { + const result = truncateForTelegram("a".repeat(200), 100, "plain"); + expect(result.length).toBe(100); + expect(result.endsWith("...")).toBe(true); + }); - it("should preserve links through toAst -> fromAst", () => { - const input = "[click here](https://example.com)"; - const result = converter.fromAst(converter.toAst(input)); - expect(result).toContain("[click here](https://example.com)"); - }); + it("truncates MarkdownV2 with escaped ellipsis", () => { + const result = truncateForTelegram("a".repeat(200), 100, "MarkdownV2"); + expect(result.length).toBeLessThanOrEqual(100); + expect(result.endsWith("\\.\\.\\.")).toBe(true); + }); - it("should preserve code blocks through toAst -> fromAst", () => { - const input = "```\nconst x = 1;\n```"; - const result = converter.fromAst(converter.toAst(input)); - expect(result).toContain("const x = 1;"); - }); + it("strips orphan backslash before ellipsis", () => { + const input = `${"a".repeat(90)}\\${"b".repeat(50)}`; + const result = truncateForTelegram(input, 100, "MarkdownV2"); + const beforeEllipsis = result.replace(ESCAPED_ELLIPSIS_PATTERN, ""); + expect(endsWithOrphanBackslash(beforeEllipsis)).toBe(false); + expect(result.endsWith("\\.\\.\\.")).toBe(true); + }); - it("should convert table to code block on roundtrip", () => { - const input = "| Col1 | Col2 |\n|------|------|\n| A | B |"; - const result = converter.fromAst(converter.toAst(input)); - expect(result).toContain("```"); - expect(result).toContain("Col1"); - expect(result).toContain("A"); - }); + it("strips unclosed bold before ellipsis", () => { + const input = `${"a".repeat(80)}*${"b".repeat(100)}`; + const result = truncateForTelegram(input, 100, "MarkdownV2"); + const beforeEllipsis = result.replace(ESCAPED_ELLIPSIS_PATTERN, ""); + const stars = [...beforeEllipsis].filter((c) => c === "*").length; + expect(stars % 2).toBe(0); + }); + + it("handles input that is all special chars", () => { + const input = ".".repeat(200); + const rendered = escapeMarkdownV2(input); + const result = truncateForTelegram(rendered, 100, "MarkdownV2"); + expect(result.length).toBeLessThanOrEqual(100); + expect(result.endsWith("\\.\\.\\.")).toBe(true); + }); +}); + +describe("findUnescapedPositions", () => { + it("finds unescaped markers", () => { + expect(findUnescapedPositions("*a*", "*")).toEqual([0, 2]); + }); + + it("ignores escaped markers", () => { + expect(findUnescapedPositions("\\*a*", "*")).toEqual([3]); + }); + + it("handles double backslash (escaped backslash) before marker", () => { + expect(findUnescapedPositions("\\\\*", "*")).toEqual([2]); + }); + + it("returns empty for no markers", () => { + expect(findUnescapedPositions("hello", "*")).toEqual([]); + }); +}); + +describe("endsWithOrphanBackslash", () => { + it("returns true for single trailing backslash", () => { + expect(endsWithOrphanBackslash("abc\\")).toBe(true); + }); + + it("returns false for double trailing backslash", () => { + expect(endsWithOrphanBackslash("abc\\\\")).toBe(false); + }); + + it("returns true for triple trailing backslash", () => { + expect(endsWithOrphanBackslash("abc\\\\\\")).toBe(true); + }); + + it("returns false for no trailing backslash", () => { + expect(endsWithOrphanBackslash("abc")).toBe(false); + }); + + it("returns false for empty string", () => { + expect(endsWithOrphanBackslash("")).toBe(false); }); }); diff --git a/packages/adapter-telegram/src/markdown.ts b/packages/adapter-telegram/src/markdown.ts index 0720415f..0cbfbac5 100644 --- a/packages/adapter-telegram/src/markdown.ts +++ b/packages/adapter-telegram/src/markdown.ts @@ -1,9 +1,13 @@ /** - * Telegram format conversion. + * Telegram MarkdownV2 format conversion. * - * Telegram supports Markdown/HTML parse modes, but to avoid - * platform-specific escaping pitfalls this adapter emits normalized - * markdown text as plain message text. + * Renders markdown AST as Telegram MarkdownV2, which requires escaping + * special characters outside of entities. This replaces the previous + * approach of emitting standard markdown with legacy parse_mode "Markdown", + * which was incompatible (standard markdown uses **bold** while Telegram + * legacy uses *bold*) and caused "can't parse entities" errors. + * + * @see https://core.telegram.org/bots/api#markdownv2-style */ import { @@ -11,13 +15,317 @@ import { BaseFormatConverter, type Content, isTableNode, + type Nodes, parseMarkdown, type Root, - stringifyMarkdown, tableToAscii, walkAst, } from "chat"; +// MarkdownV2 requires escaping these characters in normal text: +// _ * [ ] ( ) ~ ` > # + - = | { } . ! \ +const MARKDOWNV2_SPECIAL_CHARS = /([_*[\]()~`>#+\-=|{}.!\\])/g; + +// Inside ``` code blocks, only ` and \ need escaping +const CODE_BLOCK_SPECIAL_CHARS = /([`\\])/g; + +// Inside (...) of inline links, only ) and \ need escaping +const LINK_URL_SPECIAL_CHARS = /([)\\])/g; + +/** + * How the adapter intends a message to be rendered. + * + * - `"MarkdownV2"` — the body was produced by the MarkdownV2 renderer and + * must be parsed by Telegram with `parse_mode: "MarkdownV2"`. + * - `"plain"` — the body ships verbatim with no markdown parsing (the Bot + * API receives no `parse_mode` field). + * + * Internal type; the Bot API wire value is obtained via `toBotApiParseMode`. + */ +export type TelegramParseMode = "MarkdownV2" | "plain"; + +/** + * Translate the internal parse mode to the Bot API `parse_mode` field. + * Returns `undefined` for plain messages so the field is omitted. + */ +export function toBotApiParseMode( + mode: TelegramParseMode +): "MarkdownV2" | undefined { + return mode === "MarkdownV2" ? "MarkdownV2" : undefined; +} + +/** Maximum length of a Telegram text message body in characters. */ +export const TELEGRAM_MESSAGE_LIMIT = 4096; + +/** Maximum length of a media caption (photo/document/etc.) in characters. */ +export const TELEGRAM_CAPTION_LIMIT = 1024; + +// Entity delimiters whose opener/closer pairing must be preserved when +// truncating a rendered MarkdownV2 string. +const MARKDOWN_V2_ENTITY_MARKERS = ["*", "_", "~", "`"] as const; + +const MARKDOWN_V2_ELLIPSIS = "\\.\\.\\."; +const PLAIN_ELLIPSIS = "..."; + +/** + * Escape text for use in normal MarkdownV2 context (outside entities). + */ +export function escapeMarkdownV2(text: string): string { + return text.replace(MARKDOWNV2_SPECIAL_CHARS, "\\$1"); +} + +/** + * Return indices of every occurrence of `marker` in `text` that is NOT + * preceded by an odd number of backslashes (i.e. not escaped). + */ +export function findUnescapedPositions(text: string, marker: string): number[] { + const positions: number[] = []; + for (let i = 0; i < text.length; i++) { + if (text[i] !== marker) { + continue; + } + let backslashes = 0; + let j = i - 1; + while (j >= 0 && text[j] === "\\") { + backslashes++; + j--; + } + if (backslashes % 2 === 0) { + positions.push(i); + } + } + return positions; +} + +export function endsWithOrphanBackslash(text: string): boolean { + let trailing = 0; + for (let i = text.length - 1; i >= 0 && text[i] === "\\"; i--) { + trailing++; + } + return trailing % 2 === 1; +} + +/** + * Drop any trailing characters that would produce invalid MarkdownV2 after + * a length-based truncation: + * + * - orphan trailing `\` (would escape the appended ellipsis or nothing) + * - unclosed entity delimiter (`*`, `_`, `~`, `` ` ``) left open because + * the slice cut between the opener and its closer + * - unmatched `[` from a link whose closer was cut off + * + * Best-effort: may drop more than strictly necessary in edge cases, but + * guarantees the output is parseable MarkdownV2 (when the input was). + */ +function trimToMarkdownV2SafeBoundary(text: string): string { + let current = text; + const maxIterations = current.length + 1; + + for (let i = 0; i < maxIterations; i++) { + if (endsWithOrphanBackslash(current)) { + current = current.slice(0, -1); + continue; + } + + let minUnsafePosition = current.length; + + for (const marker of MARKDOWN_V2_ENTITY_MARKERS) { + const positions = findUnescapedPositions(current, marker); + if (positions.length % 2 === 1) { + const lastUnpaired = positions.at(-1) ?? current.length; + if (lastUnpaired < minUnsafePosition) { + minUnsafePosition = lastUnpaired; + } + } + } + + const openBrackets = findUnescapedPositions(current, "["); + const closeBrackets = findUnescapedPositions(current, "]"); + if (openBrackets.length > closeBrackets.length) { + const lastOpen = openBrackets.at(-1) ?? current.length; + if (lastOpen < minUnsafePosition) { + minUnsafePosition = lastOpen; + } + } + + if (minUnsafePosition >= current.length) { + return current; + } + + current = current.slice(0, minUnsafePosition); + } + + return current; +} + +/** + * Truncate a rendered string to `limit` characters, appending a + * parse-mode-appropriate ellipsis. + * + * For MarkdownV2, the naive slice + "..." is unsafe: `.` is reserved and + * must be escaped, and the slice can leave orphan escape characters (`\`) + * or cut through a paired entity (`*bold*`, `` `code` ``) resulting in + * `Bad Request: can't parse entities`. This function uses an escaped + * ellipsis (`\.\.\.`) and trims back past any unbalanced entity delimiter + * or orphan backslash before appending. + */ +export function truncateForTelegram( + text: string, + limit: number, + parseMode: TelegramParseMode +): string { + if (text.length <= limit) { + return text; + } + + const isMarkdownV2 = parseMode === "MarkdownV2"; + const ellipsis = isMarkdownV2 ? MARKDOWN_V2_ELLIPSIS : PLAIN_ELLIPSIS; + let slice = text.slice(0, limit - ellipsis.length); + + if (isMarkdownV2) { + slice = trimToMarkdownV2SafeBoundary(slice); + } + + return `${slice}${ellipsis}`; +} + +/** + * Escape text inside code/pre blocks (only ` and \ need escaping). + */ +function escapeCodeBlock(text: string): string { + return text.replace(CODE_BLOCK_SPECIAL_CHARS, "\\$1"); +} + +/** + * Escape text inside link URLs (only ) and \ need escaping). + */ +function escapeLinkUrl(text: string): string { + return text.replace(LINK_URL_SPECIAL_CHARS, "\\$1"); +} + +/** + * Recursively render an mdast node as Telegram MarkdownV2 text. + */ +function renderMarkdownV2(node: Nodes): string { + switch (node.type) { + case "root": + return node.children.map(renderMarkdownV2).join("\n\n"); + + case "paragraph": + return node.children.map(renderMarkdownV2).join(""); + + case "text": + return escapeMarkdownV2(node.value); + + case "strong": + return `*${node.children.map(renderMarkdownV2).join("")}*`; + + case "emphasis": + return `_${node.children.map(renderMarkdownV2).join("")}_`; + + case "delete": + return `~${node.children.map(renderMarkdownV2).join("")}~`; + + case "inlineCode": + return `\`${escapeCodeBlock(node.value)}\``; + + case "code": { + const lang = node.lang ?? ""; + const val = escapeCodeBlock(node.value); + return `\`\`\`${lang}\n${val}\n\`\`\``; + } + + case "link": { + const linkText = node.children.map(renderMarkdownV2).join(""); + const url = escapeLinkUrl(node.url); + return `[${linkText}](${url})`; + } + + case "blockquote": { + const inner = node.children.map(renderMarkdownV2).join("\n"); + return inner + .split("\n") + .map((line) => `>${line}`) + .join("\n"); + } + + case "list": + return node.children + .map((item, i) => { + const content = item.children.map(renderMarkdownV2).join("\n"); + if (node.ordered) { + return `${escapeMarkdownV2(`${i + 1}.`)} ${content}`; + } + return `\\- ${content}`; + }) + .join("\n"); + + case "listItem": + return node.children.map(renderMarkdownV2).join("\n"); + + case "heading": { + // Telegram has no heading syntax; render as bold + const text = node.children.map(renderMarkdownV2).join(""); + return `*${text}*`; + } + + case "thematicBreak": + return escapeMarkdownV2("———"); + + case "break": + return "\n"; + + case "image": { + const alt = escapeMarkdownV2(node.alt ?? ""); + const url = escapeLinkUrl(node.url); + return `[${alt}](${url})`; + } + + case "html": + // Telegram MarkdownV2 parser rejects raw HTML; escape so it renders literally. + return escapeMarkdownV2(node.value); + + case "linkReference": + case "imageReference": + // Reference-style links/images lose their reference resolution here. + // Render the visible label as escaped text so nothing is dropped silently. + if ("children" in node && node.children.length > 0) { + return node.children.map(renderMarkdownV2).join(""); + } + return escapeMarkdownV2(node.label ?? node.identifier); + + case "definition": + // Reference-link definitions have no visible output. + return ""; + + case "footnoteDefinition": + // Hidden — footnote bodies aren't rendered inline in chat. + return ""; + + case "footnoteReference": + // No footnotes UI in Telegram; surface the label so it's not dropped. + return escapeMarkdownV2(`[^${node.label ?? node.identifier}]`); + + case "yaml": + // Frontmatter isn't visible in chat messages. + return ""; + + case "table": + case "tableRow": + case "tableCell": + // `fromAst` walks the AST and rewrites Table nodes to Code blocks before + // calling this renderer. A table arriving here means that preprocessing + // was skipped — a contract violation, not a rendering decision. + throw new Error( + `Telegram MarkdownV2 renderer received a ${node.type} node; fromAst should have preprocessed it into a code block.` + ); + + default: { + throw new Error(`Unhandled case: ${node satisfies never}`); + } + } +} + export class TelegramFormatConverter extends BaseFormatConverter { fromAst(ast: Root): string { // Check for table nodes and replace them with code blocks, @@ -32,7 +340,7 @@ export class TelegramFormatConverter extends BaseFormatConverter { } return node; }); - return stringifyMarkdown(transformed).trim(); + return renderMarkdownV2(transformed).trim(); } toAst(text: string): Root { diff --git a/packages/chat/src/index.ts b/packages/chat/src/index.ts index bc2931cb..720983ea 100644 --- a/packages/chat/src/index.ts +++ b/packages/chat/src/index.ts @@ -213,6 +213,7 @@ export type { Link, List, ListItem, + Nodes, Paragraph, Root, Strong, diff --git a/packages/chat/src/markdown.ts b/packages/chat/src/markdown.ts index bce1ef0c..6201a575 100644 --- a/packages/chat/src/markdown.ts +++ b/packages/chat/src/markdown.ts @@ -43,6 +43,7 @@ export type { Link, List, ListItem, + Nodes, Paragraph, Root, Strong, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index dcf29975..c64b85e4 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -248,6 +248,28 @@ importers: specifier: ^5.7.2 version: 5.9.3 + examples/telegram-chat: + dependencies: + '@chat-adapter/state-memory': + specifier: workspace:* + version: link:../../packages/state-memory + '@chat-adapter/telegram': + specifier: workspace:* + version: link:../../packages/adapter-telegram + chat: + specifier: workspace:* + version: link:../../packages/chat + devDependencies: + '@types/node': + specifier: ^22.10.2 + version: 22.19.17 + tsx: + specifier: ^4.19.2 + version: 4.21.0 + typescript: + specifier: ^5.7.2 + version: 5.9.3 + packages/adapter-discord: dependencies: '@chat-adapter/shared': @@ -2864,6 +2886,9 @@ packages: '@types/node@12.20.55': resolution: {integrity: sha512-J8xLz7q2OFulZ2cyGTLE1TbbZcjpno7FaN6zdJNrgAdrJ+DZzh/uFR6YrTb4C+nXakvud8Q4+rbhoIWlYQbUFQ==} + '@types/node@22.19.17': + resolution: {integrity: sha512-wGdMcf+vPYM6jikpS/qhg6WiqSV/OhG+jeeHT/KlVqxYfD40iYJf9/AE1uQxVWFvU7MipKRkRv8NSHiCGgPr8Q==} + '@types/node@25.3.2': resolution: {integrity: sha512-RpV6r/ij22zRRdyBPcxDeKAzH43phWVKEjL2iksqo1Vz3CuBUrgmPpPhALKiRfU7OMCmeeO9vECBMsV0hMTG8Q==} @@ -5703,6 +5728,9 @@ packages: oxlint: optional: true + undici-types@6.21.0: + resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==} + undici-types@7.18.2: resolution: {integrity: sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==} @@ -8197,6 +8225,10 @@ snapshots: '@types/node@12.20.55': {} + '@types/node@22.19.17': + dependencies: + undici-types: 6.21.0 + '@types/node@25.3.2': dependencies: undici-types: 7.18.2 @@ -11618,6 +11650,8 @@ snapshots: jsonc-parser: 3.3.1 nypm: 0.6.5 + undici-types@6.21.0: {} + undici-types@7.18.2: {} undici@6.21.3: {}