From 5dc7648a208c3532dfbc0903f2294a4e7756252f Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Wed, 18 Feb 2026 01:41:36 -0600
Subject: [PATCH 1/8] feat: keep old app folder

---
 app/config.py                               |   2 +-
 app/models/classes.py                       |  19 +-
 app/models/metadata_item.py                 | 135 +---
 app/models/telegraph_item.py                |  65 +-
 app/models/url_metadata.py                  |  57 +-
 app/routers/feed_push.py                    |  53 --
 app/services/inoreader/telegram_process.py  |  23 +-
 app/services/telegram_bot/__init__.py       | 696 +-------------------
 app/services/telegram_bot/handlers.py       | 359 ++++++++++
 app/services/telegram_bot/message_sender.py | 345 ++++++++++
 app/utils/config.py                         |  62 +-
 app/utils/image.py                          |  55 +-
 app/utils/logger.py                         |  20 +-
 app/utils/network.py                        | 215 +-----
 app/utils/parse.py                          | 240 +------
 15 files changed, 810 insertions(+), 1536 deletions(-)
 delete mode 100644 app/routers/feed_push.py
 create mode 100644 app/services/telegram_bot/handlers.py
 create mode 100644 app/services/telegram_bot/message_sender.py

diff --git a/app/config.py b/app/config.py
index 50fd18d..ca0db5d 100644
--- a/app/config.py
+++ b/app/config.py
@@ -6,7 +6,7 @@
 import gettext
 import secrets
 
-from app.utils.parse import get_env_bool
+from fastfetchbot_shared.utils.parse import get_env_bool
 
 env = os.environ
 current_directory = os.path.dirname(os.path.abspath(__file__))
diff --git a/app/models/classes.py b/app/models/classes.py
index 2fab80a..e89bcf7 100644
--- a/app/models/classes.py
+++ b/app/models/classes.py
@@ -1,17 +1,2 @@
-from io import BytesIO
-
-
-class NamedBytesIO(BytesIO):
-    @property
-    def name(self):
-        return self._name
-
-    def __init__(self, content=None, name=None):
-        super().__init__(content)
-        self._name = name
-        if content is not None:
-            self.size = self.getbuffer().nbytes
-
-    @name.setter
-    def name(self, value):
-        self._name = value
+# Re-export from shared package
+from fastfetchbot_shared.models.classes import NamedBytesIO  # noqa: F401
diff --git a/app/models/metadata_item.py b/app/models/metadata_item.py
index 6b5820d..5bce9fa 100644
--- a/app/models/metadata_item.py
+++ b/app/models/metadata_item.py
@@ -1,123 +1,12 @@
-from dataclasses import dataclass
-from enum import Enum, unique
-from typing import Any, List, TypeVar, Callable, Type, cast, Union, Optional
-
-from pydantic import BaseModel
-
-"""
-MetadataItem is a dataclass that represents a single item for our services. It would be saved in the database.
-The MetadataItem is used to send to the telegram bot. Users can use the metadata to define their own message template.
-If the program doesn't find the attribute in the dict_data, it will use the default value in case of KeyError.
-"""
-
-T = TypeVar("T")
-
-
-def from_str(x: Any) -> str:
-    if x is None:
-        return ""
-    assert isinstance(x, str)
-    return x
-
-
-def from_list(f: Callable[[Any], T], x: Any) -> List[T]:
-    assert isinstance(x, list)
-    return [f(y) for y in x]
-
-
-def to_class(c: Type[T], x: Any) -> dict:
-    assert isinstance(x, c)
-    return cast(Any, x).to_dict()
-
-
-@unique
-class MessageType(str, Enum):
-    SHORT = "short"
-    LONG = "long"
-
-
-@dataclass
-class MediaFile:
-    media_type: str
-    url: str
-    original_url: Optional[str] = None
-    caption: Optional[str] = None
-
-    @staticmethod
-    def from_dict(obj: Any) -> "MediaFile":
-        assert isinstance(obj, dict)
-        media_type = from_str(obj.get("media_type"))
-        url = from_str(obj.get("url"))
-        caption = from_str(obj.get("caption"))
-        return MediaFile(media_type, url, caption)
-
-    def to_dict(self) -> dict:
-        result: dict = {}
-        result["media_type"] = from_str(self.media_type)
-        result["url"] = from_str(self.url)
-        result["caption"] = self.caption
-        return result
-
-
-@dataclass
-class MetadataItem:
-    url: str
-    telegraph_url: Optional[str]
-    content: Optional[str]
-    text: Optional[str]
-    media_files: List[MediaFile]
-    author: str
-    title: str
-    author_url: Optional[str]
-    category: str
-    message_type: Optional[MessageType]
-
-    @staticmethod
-    def from_dict(obj: Any) -> "MetadataItem":
-        assert isinstance(obj, dict)
-        url = from_str(obj.get("url"))
-        telegraph_url = from_str(obj.get("telegraph_url"))
-        content = from_str(obj.get("content"))
-        text = from_str(obj.get("text"))
-        media_files = from_list(MediaFile.from_dict, obj.get("media_files"))
-        author = from_str(obj.get("author"))
-        title = from_str(obj.get("title"))
-        author_url = from_str(obj.get("author_url"))
-        category = from_str(obj.get("category"))
-        message_type = MessageType(obj.get("message_type"))
-        return MetadataItem(
-            url,
-            telegraph_url,
-            content,
-            text,
-            media_files,
-            author,
-            title,
-            author_url,
-            category,
-            message_type,
-        )
-
-    def to_dict(self) -> dict:
-        result: dict = {
-            "url": from_str(self.url),
-            "telegraph_url": "", "content": from_str(self.content),
-            "text": from_str(self.text),
-            "media_files": from_list(
-                lambda x: to_class(MediaFile, x), self.media_files
-            ),
-            "author": from_str(self.author),
-            "title": from_str(self.title),
-            "author_url": from_str(self.author_url),
-            "category": from_str(self.category),
-            "message_type": self.message_type.value
-        }
-        return result
-
-
-def metadata_item_from_dict(s: Any) -> MetadataItem:
-    return MetadataItem.from_dict(s)
-
-
-def metadata_item_to_dict(x: MetadataItem) -> Any:
-    return to_class(MetadataItem, x)
+# Re-export from shared package
+from fastfetchbot_shared.models.metadata_item import *  # noqa: F401,F403
+from fastfetchbot_shared.models.metadata_item import (  # noqa: F401
+    MetadataItem,
+    MediaFile,
+    MessageType,
+    from_str,
+    from_list,
+    to_class,
+    metadata_item_from_dict,
+    metadata_item_to_dict,
+)
diff --git a/app/models/telegraph_item.py b/app/models/telegraph_item.py
index 04d5b77..2b4b2f0 100644
--- a/app/models/telegraph_item.py
+++ b/app/models/telegraph_item.py
@@ -1,58 +1,7 @@
-from dataclasses import dataclass
-from typing import Any, TypeVar, Type, cast
-
-"""
-The TelegraphItem is a class for generating a Telegraph page.
-If the program doesn't find the attribute in the dict_data, it will use the default value in case of KeyError.
-"""
-
-T = TypeVar("T")
-
-
-def from_str(x: Any) -> str:
-    assert isinstance(x, str)
-    return x
-
-
-def to_class(c: Type[T], x: Any) -> dict:
-    assert isinstance(x, c)
-    return cast(Any, x).to_dict()
-
-
-@dataclass
-class TelegraphItem:
-    title: str
-    url: str
-    author: str
-    author_url: str
-    category: str
-    content: str
-
-    @staticmethod
-    def from_dict(obj: Any) -> 'TelegraphItem':
-        assert isinstance(obj, dict)
-        title = from_str(obj.get("title"))
-        url = from_str(obj.get("url"))
-        author = from_str(obj.get("author"))
-        author_url = from_str(obj.get("author_url"))
-        category = from_str(obj.get("category"))
-        content = from_str(obj.get("content"))
-        return TelegraphItem(title, url, author, author_url, category, content)
-
-    def to_dict(self) -> dict:
-        result: dict = {}
-        result["title"] = from_str(self.title)
-        result["url"] = from_str(self.url)
-        result["author"] = from_str(self.author)
-        result["author_url"] = from_str(self.author_url)
-        result["category"] = from_str(self.category)
-        result["content"] = from_str(self.content)
-        return result
-
-
-def telegraph_item_from_dict(s: Any) -> TelegraphItem:
-    return TelegraphItem.from_dict(s)
-
-
-def telegraph_item_to_dict(x: TelegraphItem) -> Any:
-    return to_class(TelegraphItem, x)
+# Re-export from shared package
+from fastfetchbot_shared.models.telegraph_item import *  # noqa: F401,F403
+from fastfetchbot_shared.models.telegraph_item import (  # noqa: F401
+    TelegraphItem,
+    telegraph_item_from_dict,
+    telegraph_item_to_dict,
+)
diff --git a/app/models/url_metadata.py b/app/models/url_metadata.py
index a581045..020d120 100644
--- a/app/models/url_metadata.py
+++ b/app/models/url_metadata.py
@@ -1,50 +1,7 @@
-import re
-from dataclasses import dataclass
-from typing import Any, TypeVar, Type, cast
-
-T = TypeVar("T")
-
-
-def from_str(x: Any) -> str:
-    assert isinstance(x, str)
-    return x
-
-
-def to_class(c: Type[T], x: Any) -> dict:
-    assert isinstance(x, c)
-    return cast(Any, x).to_dict()
-
-
-@dataclass
-class UrlMetadata:
-    url: str
-    source: str
-    content_type: str
-
-    def __init__(self, url: str, source: str, content_type: str) -> None:
-        self.url = url
-        self.source = source
-        self.content_type = content_type
-
-    @staticmethod
-    def from_dict(obj: Any) -> "UrlMetadata":
-        assert isinstance(obj, dict)
-        url = from_str(obj.get("url"))
-        source = from_str(obj.get("source"))
-        the_type = from_str(obj.get("type"))
-        return UrlMetadata(url, source, the_type)
-
-    def to_dict(self) -> dict:
-        result: dict = {}
-        result["url"] = from_str(self.url)
-        result["source"] = from_str(self.source)
-        result["content_type"] = from_str(self.content_type)
-        return result
-
-
-def url_metadata_from_dict(s: Any) -> UrlMetadata:
-    return UrlMetadata.from_dict(s)
-
-
-def url_metadata_to_dict(x: UrlMetadata) -> Any:
-    return to_class(UrlMetadata, x)
+# Re-export from shared package
+from fastfetchbot_shared.models.url_metadata import *  # noqa: F401,F403
+from fastfetchbot_shared.models.url_metadata import (  # noqa: F401
+    UrlMetadata,
+    url_metadata_from_dict,
+    url_metadata_to_dict,
+)
diff --git a/app/routers/feed_push.py b/app/routers/feed_push.py
deleted file mode 100644
index 997fdb1..0000000
--- a/app/routers/feed_push.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# TODO: this script is now unused, will be removed in the future
-
-from fastapi import APIRouter
-from fastapi.requests import Request
-
-from app.config import TELEGRAM_CHANNEL_ID
-from app.services.telegram_bot import send_item_message
-from app.services.scrapers.common import InfoExtractService
-from fastapi import Security
-from app.auth import verify_api_key
-from app.utils.logger import logger
-from app.utils.parse import get_url_metadata
-
-router = APIRouter(prefix="/feedPush")
-
-
-async def get_feed_item(url: str, channel_id: str, **kwargs):
-    try:
-        channel_id = int(channel_id) if channel_id.startswith("-") else channel_id
-        url_metadata = await get_url_metadata(url)
-        item = InfoExtractService(url_metadata, **kwargs)
-        metadata_item = await item.get_item()
-        if channel_id not in TELEGRAM_CHANNEL_ID:
-            logger.error(f"channel_id {channel_id} not found")
-            return
-        await send_item_message(metadata_item, chat_id=channel_id)
-    except Exception as e:
-        logger.error(f"Error while getting item: {e}")
-
-
-@router.post("/", dependencies=[Security(verify_api_key)])
-async def push_feed_item(
-    request: Request,
-):
-    try:
-        data = await request.json()
-        params = request.query_params
-        url = (
-            data.get("url")
-            or data.get("aurl")
-            or params.get("url")
-            or params.get("aurl")
-        )
-        if not url:
-            return f"Error: url is required"
-        channel_id = data.get("channelId") or params.get("channelId")
-        if not channel_id:
-            return f"Error: channelId is required"
-        kwargs = data.get("kwargs", {})
-        await get_feed_item(url, channel_id, **kwargs)
-        return "ok"
-    except Exception as e:
-        return f"Error: {e}"
diff --git a/app/services/inoreader/telegram_process.py b/app/services/inoreader/telegram_process.py
index a1102f5..975e894 100644
--- a/app/services/inoreader/telegram_process.py
+++ b/app/services/inoreader/telegram_process.py
@@ -1,22 +1,34 @@
-from typing import Union, Optional, Dict
+from typing import Union, Optional, Dict, Callable, Awaitable
 
 from app.config import TELEGRAM_CHANNEL_ID
 from app.models.url_metadata import UrlMetadata
 from app.services.inoreader import Inoreader
 from app.services.scrapers.common import InfoExtractService
-from app.services.telegram_bot import send_item_message
 from app.utils.logger import logger
 from app.utils.parse import get_url_metadata, get_bool
 
 default_telegram_channel_id = TELEGRAM_CHANNEL_ID[0] if TELEGRAM_CHANNEL_ID else None
 
+# Type alias for the message callback
+MessageCallback = Callable[[dict, Union[int, str]], Awaitable[None]]
+
+
+async def _default_message_callback(metadata_item: dict, chat_id: Union[int, str]) -> None:
+    """Default callback that sends via Telegram bot. Used when no callback is provided."""
+    from app.services.telegram_bot import send_item_message
+    await send_item_message(metadata_item, chat_id=chat_id)
+
 
 async def process_inoreader_data(
         data: list,
         use_inoreader_content: bool,
         telegram_channel_id: Union[int, str] = default_telegram_channel_id,
         stream_id: str = None,
+        message_callback: MessageCallback = None,
 ):
+    if message_callback is None:
+        message_callback = _default_message_callback
+
     for item in data:
         url_type_item = await get_url_metadata(item["aurl"])
         url_type_dict = url_type_item.to_dict()
@@ -46,7 +58,7 @@ async def process_inoreader_data(
                 store_document=True,
             )
         message_metadata_item = await metadata_item.get_item()
-        await send_item_message(message_metadata_item, chat_id=telegram_channel_id)
+        await message_callback(message_metadata_item, telegram_channel_id)
         if stream_id:
             await Inoreader.mark_all_as_read(
                 stream_id=stream_id, timestamp=item["timestamp"] - 1
@@ -57,7 +69,7 @@ async def get_inoreader_item_async(
         data: Optional[Dict] = None,
         trigger: bool = False,
         params: Optional[Dict] = None,
-        # filters: Optional[Dict] = None,
+        message_callback: MessageCallback = None,
 ) -> None:
     stream_id = None
     use_inoreader_content = True
@@ -83,7 +95,8 @@ async def get_inoreader_item_async(
     if type(data) is dict:
         data = [data]
     await process_inoreader_data(
-        data, use_inoreader_content, telegram_channel_id, stream_id
+        data, use_inoreader_content, telegram_channel_id, stream_id,
+        message_callback=message_callback,
     )
     if stream_id:
         await Inoreader.mark_all_as_read(stream_id=stream_id)
diff --git a/app/services/telegram_bot/__init__.py b/app/services/telegram_bot/__init__.py
index 5de80d8..0fe696e 100755
--- a/app/services/telegram_bot/__init__.py
+++ b/app/services/telegram_bot/__init__.py
@@ -1,36 +1,15 @@
 # TODO: Implement Telegram Service
 # example: https://docs.python-telegram-bot.org/en/stable/examples.customwebhookbot.html
-import asyncio
-import html
-import json
-import os
 import mimetypes
-import aiofiles
-import traceback
-from io import BytesIO
-from urllib.parse import urlparse
-from urllib.request import url2pathname
-from typing import Union
 
 mimetypes.init()
 
 from telegram import (
     Update,
     MessageEntity,
-    InlineKeyboardButton,
-    InlineKeyboardMarkup,
-    Message,
-    InputMediaPhoto,
-    InputMediaVideo,
-    InputMediaDocument,
-    InputMediaAnimation,
-    InputMediaAudio,
 )
-from telegram.constants import ParseMode
 from telegram.ext import (
     Application,
-    CallbackContext,
-    ContextTypes,
     MessageHandler,
     CallbackQueryHandler,
     filters,
@@ -38,49 +17,31 @@
     AIORateLimiter,
 )
 
-from app.database import save_instances
-from app.models.metadata_item import MessageType
-from app.models.telegram_chat import TelegramMessage, TelegramUser, TelegramChat
-from app.services.scrapers.common import InfoExtractService
-from app.utils.parse import get_url_metadata, telegram_message_html_trim
-from app.utils.network import download_file_by_metadata_item
-from app.utils.image import Image, image_compressing, check_image_type
-from app.utils.config import SOCIAL_MEDIA_WEBSITE_PATTERNS, VIDEO_WEBSITE_PATTERNS
 from app.utils.logger import logger
 from app.config import (
     TELEGRAM_BOT_TOKEN,
     TELEGRAM_WEBHOOK_URL,
     TELEGRAM_BOT_SECRET_TOKEN,
-    TELEGRAM_CHANNEL_ID,
-    TELEGRAM_CHANNEL_ADMIN_LIST,
-    TELEBOT_DEBUG_CHANNEL,
     TELEBOT_API_SERVER,
     TELEBOT_API_SERVER_FILE,
     TELEBOT_LOCAL_FILE_MODE,
     TELEBOT_CONNECT_TIMEOUT,
     TELEBOT_READ_TIMEOUT,
     TELEBOT_WRITE_TIMEOUT,
-    TELEGRAM_IMAGE_DIMENSION_LIMIT,
-    TELEGRAM_IMAGE_SIZE_LIMIT,
-    TELEGRAM_GROUP_MESSAGE_BAN_LIST,
-    TELEGRAM_BOT_MESSAGE_BAN_LIST,
-    FILE_EXPORTER_ON,
-    JINJA2_ENV,
-    OPENAI_API_KEY,
-    DATABASE_ON,
-    TEMPLATE_LANGUAGE, TELEBOT_MAX_RETRY, GENERAL_SCRAPING_ON,
+    TELEBOT_MAX_RETRY,
 )
-from app.services.telegram_bot.config import (
-    HTTPS_URL_REGEX,
-    TELEGRAM_SINGLE_MESSAGE_MEDIA_LIMIT,
-    TELEGRAM_FILE_UPLOAD_LIMIT,
-    TELEGRAM_FILE_UPLOAD_LIMIT_LOCAL_API,
-    REFERER_REQUIRED,
-    TELEGRAM_TEXT_LIMIT,
-    TEMPLATE_TRANSLATION,
+
+# Re-export for external consumers
+from app.services.telegram_bot.message_sender import send_item_message  # noqa: F401
+from app.services.telegram_bot.handlers import (  # noqa: F401
+    https_url_process,
+    https_url_auto_process,
+    all_messages_process,
+    buttons_process,
+    invalid_buttons,
+    error_process,
+    content_process_function,
 )
-from app.models.classes import NamedBytesIO
-from app.models.url_metadata import UrlMetadata
 
 """
 application and handlers initialization
@@ -112,12 +73,6 @@ async def set_webhook() -> bool:
 else:
     logger.error("TELEGRAM_BOT_TOKEN is not set!")
 
-environment = JINJA2_ENV
-template = environment.get_template("social_media_message.jinja2")
-template_text = TEMPLATE_TRANSLATION.get(
-    TEMPLATE_LANGUAGE, TEMPLATE_TRANSLATION["zh_CN"]
-)
-
 
 async def startup() -> None:
     await application.initialize()
@@ -187,630 +142,3 @@ async def process_telegram_update(
     update = Update.de_json(data=data, bot=application.bot)
     application.bot.insert_callback_data(update)
     await application.update_queue.put(update)
-
-
-async def https_url_process(update: Update, context: CallbackContext) -> None:
-    message = update.message
-    welcome_message = await message.reply_text(
-        text="Processing...",
-    )
-    url_dict: dict = message.parse_entities(types=["url"])
-    await welcome_message.delete()
-    for i, url in enumerate(url_dict.values()):
-        process_message = await message.reply_text(
-            text=f"Processing the {i + 1}th url...",
-        )
-        url_metadata = await get_url_metadata(url, ban_list=TELEGRAM_BOT_MESSAGE_BAN_LIST)
-        if url_metadata.source == "banned":
-            await process_message.edit_text(
-                text=f"For the {i + 1} th url, the url is banned."
-            )
-            return
-        if url_metadata.source == "unknown":
-            if GENERAL_SCRAPING_ON:
-                await process_message.edit_text(
-                    text=f"Uncategorized url found. General webpage parser is on, Processing..."
-                )
-                metadata_item = await content_process_function(url_metadata=url_metadata)
-                await send_item_message(
-                    metadata_item, chat_id=message.chat_id
-                )
-            await process_message.edit_text(
-                text=f"For the {i + 1} th url, no supported url found."
-            )
-            return
-        else:
-            await process_message.edit_text(
-                text=f"{url_metadata.source} url found. Processing..."
-            )
-            # create the inline keyboard
-            special_function_keyboard = []
-            basic_function_keyboard = []
-            if TELEGRAM_CHANNEL_ID and (
-                    TELEGRAM_CHANNEL_ADMIN_LIST
-                    and str(message.from_user.id) in TELEGRAM_CHANNEL_ADMIN_LIST
-            ):
-                special_function_keyboard.append(
-                    InlineKeyboardButton(
-                        "Send to Channel",
-                        callback_data={
-                            "type": "channel",
-                            "metadata": url_metadata,
-                            "extra_args": {"store_document": True},
-                        },
-                    ),
-                )
-            # video content url buttons
-            if url_metadata.content_type == "video":
-                basic_function_keyboard.extend(
-                    [
-                        InlineKeyboardButton(
-                            "Get Info",
-                            callback_data={
-                                "type": "video",
-                                "metadata": url_metadata,
-                                "extra_args": {"download": False},
-                            },
-                        ),
-                        InlineKeyboardButton(
-                            "Download",
-                            callback_data={
-                                "type": "video",
-                                "metadata": url_metadata,
-                            },
-                        ),
-                    ]
-                )
-                if FILE_EXPORTER_ON:
-                    special_function_keyboard.extend(
-                        [
-                            InlineKeyboardButton(
-                                "Audio Only",
-                                callback_data={
-                                    "type": "video",
-                                    "metadata": url_metadata,
-                                    "extra_args": {
-                                        "audio_only": True,
-                                    },
-                                },
-                            ),
-                            InlineKeyboardButton(
-                                "Download HD",
-                                callback_data={
-                                    "type": "video",
-                                    "metadata": url_metadata,
-                                    "extra_args": {"hd": True},
-                                },
-                            ),
-                        ]
-                    )
-                    if OPENAI_API_KEY:
-                        special_function_keyboard.append(
-                            InlineKeyboardButton(
-                                "Transcribe Text",
-                                callback_data={
-                                    "type": "video",
-                                    "metadata": url_metadata,
-                                    "extra_args": {
-                                        "audio_only": True,
-                                        "transcribe": True,
-                                        "store_document": True,
-                                    },
-                                },
-                            ),
-                        )
-            elif url_metadata.content_type == "social_media":
-                basic_function_keyboard.extend(
-                    [
-                        InlineKeyboardButton(
-                            "Send to Me",
-                            callback_data={"type": "private", "metadata": url_metadata},
-                        ),
-                        InlineKeyboardButton(
-                            "Force Send in Chat",
-                            callback_data={"type": "force", "metadata": url_metadata},
-                        ),
-                    ]
-                )
-                if FILE_EXPORTER_ON:
-                    special_function_keyboard.append(
-                        InlineKeyboardButton(
-                            "Send with PDF",
-                            callback_data={
-                                "type": "pdf",
-                                "metadata": url_metadata,
-                                "extra_args": {"store_document": True},
-                            },
-                        ),
-                    )
-            basic_function_keyboard.append(
-                InlineKeyboardButton(
-                    "Cancel",
-                    callback_data={"type": "cancel"},
-                ),
-            )
-            keyboard = [
-                special_function_keyboard,
-                basic_function_keyboard,
-            ]
-            reply_markup = InlineKeyboardMarkup(keyboard)
-            await process_message.reply_text(
-                f"For the {i + 1}th url: {url}, please choose the function you want to use:",
-                reply_markup=reply_markup,
-            )
-            await process_message.delete()
-
-
-async def https_url_auto_process(update: Update, context: CallbackContext) -> None:
-    message = update.message
-    url_dict = message.parse_entities(types=["url"])
-    for i, url in enumerate(url_dict.values()):
-        url_metadata = await get_url_metadata(
-            url, ban_list=TELEGRAM_GROUP_MESSAGE_BAN_LIST
-        )
-        if url_metadata.source == "unknown" and GENERAL_SCRAPING_ON:
-            metadata_item = await content_process_function(url_metadata=url_metadata)
-            await send_item_message(
-                metadata_item, chat_id=message.chat_id, message=message
-            )
-        elif url_metadata.source == "unknown" or url_metadata.source == "banned":
-            logger.debug(f"for the {i + 1}th url {url}, no supported url found.")
-            return
-        if url_metadata.to_dict().get("source") in SOCIAL_MEDIA_WEBSITE_PATTERNS.keys():
-            metadata_item = await content_process_function(url_metadata=url_metadata)
-            await send_item_message(
-                metadata_item, chat_id=message.chat_id, message=message
-            )
-        if url_metadata.to_dict().get("source") in VIDEO_WEBSITE_PATTERNS.keys():
-            metadata_item = await content_process_function(url_metadata=url_metadata)
-            await send_item_message(
-                metadata_item, chat_id=message.chat_id, message=message
-            )
-
-
-async def all_messages_process(update: Update, context: CallbackContext) -> None:
-    message = update.message
-    logger.debug(message)
-    if message and DATABASE_ON:
-        telegram_chat = TelegramChat.construct(**message.chat.to_dict())
-        telegram_user = TelegramUser.construct(**message.from_user.to_dict())
-        telegram_message = TelegramMessage(
-            datetime=message.date,
-            chat=telegram_chat,
-            user=telegram_user,
-            text=message.text or "",
-        )
-        await save_instances(telegram_message)
-
-
-async def buttons_process(update: Update, context: CallbackContext) -> None:
-    query = update.callback_query
-    data = query.data
-    chat_id = None
-    if data["type"] == "cancel":
-        await query.answer("Canceled")
-    else:
-        if data["type"] == "private" or data["type"] == "force":
-            await query.answer("Sending to you...")
-        if data["type"] == "channel":
-            if data.get("channel_id") or len(TELEGRAM_CHANNEL_ID) == 1:
-                channel_chat = await application.bot.get_chat(
-                    chat_id=data.get("channel_id")
-                    if data.get("channel_id")
-                    else TELEGRAM_CHANNEL_ID[0]
-                )
-                await query.answer("Sending to channel...")
-                if channel_chat.type == "channel":
-                    chat_id = channel_chat.id
-                else:
-                    await query.message.reply_text(
-                        text="Sorry, the provided channel id does not exist or is not a channel."
-                    )
-                    chat_id = query.message.chat_id
-            elif len(TELEGRAM_CHANNEL_ID) > 1:
-                choose_channel_keyboard = await _create_choose_channel_keyboard(
-                    data=data
-                )
-                await query.message.reply_text(
-                    text="Please choose the channel you want to send:",
-                    reply_markup=InlineKeyboardMarkup(choose_channel_keyboard),
-                )
-                await query.message.delete()
-                context.drop_callback_data(query)
-                return
-        else:
-            chat_id = query.message.chat_id
-        if data["type"] == "video":
-            await query.answer("Video processing...")
-        replying_message = await query.message.reply_text(
-            text=f"Item processing...",
-        )
-        extra_args = data["extra_args"] if "extra_args" in data else {}
-        metadata_item = await content_process_function(
-            url_metadata=data["metadata"], **extra_args
-        )
-        await replying_message.edit_text(
-            text=f"Item processed. Sending to the target...",
-        )
-        if data["type"] == "force":
-            metadata_item["message_type"] = MessageType.SHORT
-        await send_item_message(metadata_item, chat_id=chat_id)
-        if data["type"] == "channel":
-            await query.message.reply_text(
-                text=f"Item sent to the channel.",
-            )
-        await replying_message.delete()
-    await query.message.delete()
-    context.drop_callback_data(query)
-
-
-async def _create_choose_channel_keyboard(data: dict) -> list:
-    choose_channel_keyboard = []
-    for i, channel_id in enumerate(TELEGRAM_CHANNEL_ID):
-        channel_chat = await application.bot.get_chat(chat_id=channel_id)
-        choose_channel_keyboard.append(
-            [
-                InlineKeyboardButton(
-                    channel_chat.title,
-                    callback_data={
-                        "type": "channel",
-                        "metadata": data["metadata"],
-                        "extra_args": data["extra_args"],
-                        "channel_id": channel_id,
-                    },
-                )
-            ]
-        )
-    choose_channel_keyboard.append(
-        [
-            InlineKeyboardButton(
-                "Cancel",
-                callback_data={"type": "cancel"},
-            )
-        ]
-    )
-    return choose_channel_keyboard
-
-
-async def invalid_buttons(update: Update, context: CallbackContext) -> None:
-    await update.callback_query.answer("Invalid button!")
-    await update.effective_message.edit_text(
-        "Sorry, Error Occurred, I could not process this button click 😕."
-    )
-
-
-async def content_process_function(url_metadata: UrlMetadata, **kwargs) -> dict:
-    item = InfoExtractService(url_metadata, **kwargs)
-    metadata_item = await item.get_item()
-    return metadata_item
-
-
-async def send_item_message(
-        data: dict, chat_id: Union[int, str] = None, message: Message = None
-) -> None:
-    """
-    :param data: (dict) metadata of the item
-    :param chat_id: (int) any chat id for sending
-    :param message: (Message) any message to reply
-    :return:
-    """
-    logger.debug(f"send_item_message: {data}, {chat_id}, {message}")
-    if not chat_id and not message:
-        raise ValueError("must provide chat_id or message")
-    if (
-            not chat_id
-    ) and message:  # this function supports direct reply to a message even if the chat_id is None
-        chat_id = message.chat.id
-    discussion_chat_id = chat_id
-    the_chat = await application.bot.get_chat(chat_id=chat_id)
-    logger.debug(f"the chat of sending message: {the_chat}")
-    if the_chat.type == "channel" and the_chat.linked_chat_id:
-        discussion_chat_id = the_chat.linked_chat_id
-    try:
-        caption_text = message_formatting(data)
-        if len(data["media_files"]) > 0:
-            # if the message type is short and there are some media files, send media group
-            reply_to_message_id = None
-            media_message_group, file_message_group = await media_files_packaging(
-                media_files=data["media_files"], data=data
-            )
-            if (
-                    len(media_message_group) > 0
-            ):  # if there are some media groups to send, send it
-                for i, media_group in enumerate(media_message_group):
-                    caption_text = (
-                        caption_text
-                        if i == 0
-                        else f"the {i + 1}th part of the media item:"
-                    )
-                    logger.debug(f"media group: {media_group}")
-                    logger.debug(
-                        f"caption text: {caption_text},length={len(caption_text)}"
-                    )
-                    sent_media_files_message = await application.bot.send_media_group(
-                        chat_id=chat_id,
-                        media=media_group,
-                        parse_mode=ParseMode.HTML,
-                        caption=caption_text,
-                        write_timeout=TELEBOT_WRITE_TIMEOUT,
-                        reply_to_message_id=message.message_id if message else None,
-                    )
-                    if sent_media_files_message is tuple:
-                        reply_to_message_id = sent_media_files_message[0].message_id
-                    elif sent_media_files_message is Message:
-                        reply_to_message_id = sent_media_files_message.message_id
-                    logger.debug(f"sent media files message: {sent_media_files_message}")
-            else:
-                sent_message = await application.bot.send_message(
-                    chat_id=chat_id,
-                    text=caption_text,
-                    parse_mode=ParseMode.HTML,
-                    reply_to_message_id=message.message_id if message else None,
-                    disable_web_page_preview=True
-                    if data["message_type"] == MessageType.SHORT
-                    else False,
-                    disable_notification=True,
-                )
-            if discussion_chat_id != chat_id:
-                await asyncio.sleep(
-                    3
-                )  # wait for several seconds to avoid missing the target message
-                # if the chat is a channel, get the latest pinned message from the channel and reply to it
-                group_chat = await application.bot.get_chat(chat_id=discussion_chat_id)
-                logger.debug(f"the group chat: {group_chat}")
-                pinned_message = group_chat.pinned_message
-                logger.debug(f"the pinned message: {pinned_message}")
-                if len(media_message_group) > 0:
-                    if (
-                            pinned_message.forward_origin.message_id
-                            == sent_media_files_message[-1].message_id
-                    ):
-                        reply_to_message_id = (
-                                group_chat.pinned_message.id
-                                - len(sent_media_files_message)
-                                + 1
-                        )
-                    else:
-                        reply_to_message_id = group_chat.pinned_message.id + 1
-                elif pinned_message.forward_origin.message_id == sent_message.message_id:
-                    reply_to_message_id = group_chat.pinned_message.id
-                else:
-                    reply_to_message_id = group_chat.pinned_message.id + 1
-            if (
-                    len(file_message_group) > 0
-            ):  # to send files, the files messages should be replied to the message sent before
-                logger.debug(f"reply_to_message_id: {reply_to_message_id}")
-                for file_group in file_message_group:
-                    logger.debug(f"file group: {file_group}")
-                    await application.bot.send_media_group(
-                        chat_id=discussion_chat_id,
-                        media=file_group,
-                        reply_to_message_id=reply_to_message_id,
-                        parse_mode=ParseMode.HTML,
-                        disable_notification=True,
-                    )
-        else:
-            await application.bot.send_message(
-                chat_id=chat_id,
-                text=caption_text,
-                parse_mode=ParseMode.HTML,
-                reply_to_message_id=message.message_id if message else None,
-                disable_web_page_preview=True
-                if data["message_type"] == "short"
-                else False,
-                disable_notification=True,
-            )
-    # except BadRequest as e:
-    #     logger.error(e)
-    # except RetryAfter as e:
-    #     logger.error(e)
-    # except TimedOut as e:
-    #     logger.error(e)
-    #     await application.bot.send_message(
-    #         chat_id=discussion_chat_id,
-    #         text="Timed out while sending the item to the target 😕",
-    #         reply_to_message_id=message.message_id if message else None,
-    #     )
-    except Exception as e:
-        logger.error(e)
-        traceback.print_exc()
-        # await application.bot.send_message(
-        #     chat_id=discussion_chat_id,
-        #     text="Error occurred while sending the item to the target 😕",
-        #     reply_to_message_id=message.message_id if message else None,
-        # )
-        await send_debug_channel(traceback.format_exc())
-
-
-async def error_process(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
-    logger.error("Exception while handling an update:", exc_info=context.error)
-    tb_list = traceback.format_exception(
-        None, context.error, context.error.__traceback__
-    )
-    tb_string = "".join(tb_list)
-    update_str = update.to_dict() if isinstance(update, Update) else str(update)
-    message = (
-        f"An exception was raised while handling an update\n"
-        f"<pre>update = {html.escape(json.dumps(update_str, indent=2, ensure_ascii=False))}"
-        "</pre>\n\n"
-        f"<pre>context.chat_data = {html.escape(str(context.chat_data))}</pre>\n\n"
-        f"<pre>context.user_data = {html.escape(str(context.user_data))}</pre>\n\n"
-        f"<pre>{html.escape(tb_string)}</pre>"
-    )
-    debug_chat_id = update.message.chat_id
-    if TELEBOT_DEBUG_CHANNEL is not None:
-        debug_chat_id = TELEBOT_DEBUG_CHANNEL
-    await context.bot.send_message(
-        chat_id=debug_chat_id, text=message, parse_mode=ParseMode.HTML
-    )
-
-
-async def send_debug_channel(message: str) -> None:
-    if TELEBOT_DEBUG_CHANNEL is not None:
-        await application.bot.send_message(
-            chat_id=TELEBOT_DEBUG_CHANNEL, text=message, parse_mode=ParseMode.HTML
-        )
-
-
-def message_formatting(data: dict) -> str:
-    """
-    Format the message to be sent to the user.
-    :param data:
-    :return: text (str) the formatted text for telegram bot api sending message.
-    """
-    if data["message_type"] == "short":
-        data["text"] = telegram_message_html_trim(data["text"])
-    message_template = template
-    text = message_template.render(data=data, template_text=template_text)
-    logger.debug(f"message text: \n{text}")
-    return text
-
-
-async def media_files_packaging(media_files: list, data: dict) -> tuple:
-    """
-    Download the media files from data["media_files"] and package them into a list of media group or file group for
-    sending them by send_media_group method or send_document method.
-    :param data: (dict) metadata of the item
-    :param media_files: (list) a list of media files,
-    :param caption_text: (str) the caption text
-    :return: (tuple) a tuple of media group and file group
-        media_message_group: (list) a list of media items, the type of each item is InputMediaPhoto or InputMediaVideo
-        file_group: (list) a list of file items, the type of each item is InputFile
-    TODO: It's not a good practice for this function. This method will still download all the media files even when
-        media files are too large and it can be memory consuming even if we use a database to store the media files.
-        The function should be optimized to resolve the media files one group by one group and send each group
-        immediately after it is resolved.
-        This processing method should be optimized in the future.
-    """
-    media_counter, file_counter = 0, 0
-    media_message_group, media_group, file_message_group, file_group = [], [], [], []
-    for (
-            media_item
-    ) in media_files:  # To traverse all media items in the media files list
-        # check if we need to create a new media group
-        if media_counter == TELEGRAM_SINGLE_MESSAGE_MEDIA_LIMIT:
-            # the limitation of media item for a single telegram media group message is 10
-            media_message_group.append(media_group)
-            media_group = []
-            media_counter = 0
-        if file_counter == TELEGRAM_SINGLE_MESSAGE_MEDIA_LIMIT:
-            # the limitation of media item for a single telegram media group message is 10
-            file_message_group.append(file_group)
-            file_group = []
-            file_counter = 0
-        if not (
-                media_item["media_type"] in ["image", "gif", "video"]
-                and data["message_type"] == "long"
-        ):
-            # check the url validity
-            url_parser = urlparse(media_item["url"])
-            if url_parser.scheme in [
-                "http",
-                "https",
-            ]:  # if the url is a http url, download the file
-                file_format = "mp4" if media_item["media_type"] == "video" else None
-                io_object = await download_file_by_metadata_item(
-                    media_item["url"], data=data, file_format=file_format
-                )
-                filename = io_object.name
-                file_size = io_object.size
-            else:  # if the url is a local file path, just add it to the media group
-                try:
-                    file_path = url2pathname(media_item["url"])
-                    async with aiofiles.open(file_path, mode="rb") as f:
-                        filename = os.path.basename(file_path)
-                        content = await f.read()
-                        io_object = NamedBytesIO(content=content, name=filename)
-                    file_size = io_object.size
-                except Exception as e:  # the url is not a valid file path
-                    logger.error(e)
-                    continue
-            # check the file size
-            if (
-                    not TELEBOT_API_SERVER
-            ):  # the official telegram bot api server only supports 50MB file
-                if file_size > TELEGRAM_FILE_UPLOAD_LIMIT:
-                    # if the size is over 50MB, skip this file
-                    continue
-            else:
-                if file_size > TELEGRAM_FILE_UPLOAD_LIMIT_LOCAL_API:
-                    # for local api sever, if the size is over 2GB, skip this file
-                    continue
-            # check media files' type and process them by their type
-            if media_item["media_type"] == "image":
-                image_url = media_item["url"]
-                ext = await check_image_type(io_object)
-                # jpg to jpeg, ignore case
-                if ext.lower() == "jpg":
-                    ext = "JPEG"
-                io_object.seek(0)
-                image = Image.open(io_object, formats=[ext])
-                img_width, img_height = image.size
-                ratio = float(max(img_height, img_width)) / float(
-                    min(img_height, img_width)
-                )
-                # don't try to resize image if the ratio is too large
-                if (
-                        ratio < 5
-                        or max(img_height, img_width) < TELEGRAM_IMAGE_DIMENSION_LIMIT
-                ):
-                    image = image_compressing(image, TELEGRAM_IMAGE_DIMENSION_LIMIT)
-                    with BytesIO() as buffer:
-                        # mime_type file format
-                        image.save(buffer, format=ext)
-                        buffer.seek(0)
-                        resized_ratio = max(image.height, image.width) / min(
-                            image.height, image.width
-                        )
-                        logger.debug(
-                            f"resized image size: {buffer.getbuffer().nbytes}, ratio: {resized_ratio}, width: {image.width}, height: {image.height}"
-                        )
-                        media_group.append(InputMediaPhoto(buffer, filename=filename))
-                # the image is not able to get json serialized
-                logger.debug(
-                    f"image size: {file_size}, ratio: {ratio}, width: {img_width}, height: {img_height}"
-                )
-                if (
-                        file_size > TELEGRAM_IMAGE_SIZE_LIMIT
-                        or img_width > TELEGRAM_IMAGE_DIMENSION_LIMIT
-                        or img_height > TELEGRAM_IMAGE_DIMENSION_LIMIT
-                ) and data["category"] not in ["xiaohongshu"]:
-                    io_object = await download_file_by_metadata_item(
-                        url=image_url, data=data
-                    )
-                    if not io_object.name.endswith(".gif"):
-                        if not io_object.name.endswith(ext.lower()):
-                            io_object.name = io_object.name + "." + ext.lower()
-                        # TODO: it is not a good way to judge whether it is a gif...
-                        file_group.append(
-                            InputMediaDocument(io_object, parse_mode=ParseMode.HTML)
-                        )
-                        file_counter += 1
-            elif media_item["media_type"] == "gif":
-                io_object = await download_file_by_metadata_item(
-                    url=media_item["url"],
-                    data=data,
-                    file_name="gif_image-" + str(media_counter) + ".gif",
-                )
-                io_object.name = io_object.name + ".gif"
-                media_group.append(InputMediaAnimation(io_object))
-            elif media_item["media_type"] == "video":
-                media_group.append(InputMediaVideo(io_object, supports_streaming=True))
-            # TODO: not have any services to store audio files for now, just a placeholder
-            elif media_item["media_type"] == "audio":
-                media_group.append(InputMediaAudio(io_object))
-            elif media_item["media_type"] == "document":
-                file_group.append(
-                    InputMediaDocument(io_object, parse_mode=ParseMode.HTML)
-                )
-                file_counter += 1
-            media_counter += 1
-            logger.info(
-                f"get the {media_counter}th media item,type: {media_item['media_type']}, url: {media_item['url']}"
-            )
-    # check if the media group is empty, if it is, return None
-    if len(media_group) > 0:  # append the last media group
-        media_message_group.append(media_group)
-    if len(file_group) > 0:
-        file_message_group.append(file_group)
-    return media_message_group, file_message_group
diff --git a/app/services/telegram_bot/handlers.py b/app/services/telegram_bot/handlers.py
new file mode 100644
index 0000000..73bd5b9
--- /dev/null
+++ b/app/services/telegram_bot/handlers.py
@@ -0,0 +1,359 @@
+import html
+import json
+import traceback
+
+from telegram import (
+    Update,
+    MessageEntity,
+    InlineKeyboardButton,
+    InlineKeyboardMarkup,
+)
+from telegram.constants import ParseMode
+from telegram.ext import (
+    CallbackContext,
+    ContextTypes,
+)
+
+from app.database import save_instances
+from app.models.metadata_item import MessageType
+from app.models.telegram_chat import TelegramMessage, TelegramUser, TelegramChat
+from app.models.url_metadata import UrlMetadata
+from app.services.scrapers.common import InfoExtractService
+from app.services.telegram_bot.message_sender import send_item_message
+from app.utils.parse import get_url_metadata
+from app.utils.config import SOCIAL_MEDIA_WEBSITE_PATTERNS, VIDEO_WEBSITE_PATTERNS
+from app.utils.logger import logger
+from app.config import (
+    TELEGRAM_CHANNEL_ID,
+    TELEGRAM_CHANNEL_ADMIN_LIST,
+    TELEBOT_DEBUG_CHANNEL,
+    TELEGRAM_GROUP_MESSAGE_BAN_LIST,
+    TELEGRAM_BOT_MESSAGE_BAN_LIST,
+    FILE_EXPORTER_ON,
+    OPENAI_API_KEY,
+    DATABASE_ON,
+    GENERAL_SCRAPING_ON,
+)
+
+
+async def content_process_function(url_metadata: UrlMetadata, **kwargs) -> dict:
+    item = InfoExtractService(url_metadata, **kwargs)
+    metadata_item = await item.get_item()
+    return metadata_item
+
+
+async def https_url_process(update: Update, context: CallbackContext) -> None:
+    message = update.message
+    welcome_message = await message.reply_text(
+        text="Processing...",
+    )
+    url_dict: dict = message.parse_entities(types=["url"])
+    await welcome_message.delete()
+    for i, url in enumerate(url_dict.values()):
+        process_message = await message.reply_text(
+            text=f"Processing the {i + 1}th url...",
+        )
+        url_metadata = await get_url_metadata(url, ban_list=TELEGRAM_BOT_MESSAGE_BAN_LIST)
+        if url_metadata.source == "banned":
+            await process_message.edit_text(
+                text=f"For the {i + 1} th url, the url is banned."
+            )
+            return
+        if url_metadata.source == "unknown":
+            if GENERAL_SCRAPING_ON:
+                await process_message.edit_text(
+                    text=f"Uncategorized url found. General webpage parser is on, Processing..."
+                )
+                metadata_item = await content_process_function(url_metadata=url_metadata)
+                await send_item_message(
+                    metadata_item, chat_id=message.chat_id
+                )
+            await process_message.edit_text(
+                text=f"For the {i + 1} th url, no supported url found."
+            )
+            return
+        else:
+            await process_message.edit_text(
+                text=f"{url_metadata.source} url found. Processing..."
+            )
+            # create the inline keyboard
+            special_function_keyboard = []
+            basic_function_keyboard = []
+            if TELEGRAM_CHANNEL_ID and (
+                    TELEGRAM_CHANNEL_ADMIN_LIST
+                    and str(message.from_user.id) in TELEGRAM_CHANNEL_ADMIN_LIST
+            ):
+                special_function_keyboard.append(
+                    InlineKeyboardButton(
+                        "Send to Channel",
+                        callback_data={
+                            "type": "channel",
+                            "metadata": url_metadata,
+                            "extra_args": {"store_document": True},
+                        },
+                    ),
+                )
+            # video content url buttons
+            if url_metadata.content_type == "video":
+                basic_function_keyboard.extend(
+                    [
+                        InlineKeyboardButton(
+                            "Get Info",
+                            callback_data={
+                                "type": "video",
+                                "metadata": url_metadata,
+                                "extra_args": {"download": False},
+                            },
+                        ),
+                        InlineKeyboardButton(
+                            "Download",
+                            callback_data={
+                                "type": "video",
+                                "metadata": url_metadata,
+                            },
+                        ),
+                    ]
+                )
+                if FILE_EXPORTER_ON:
+                    special_function_keyboard.extend(
+                        [
+                            InlineKeyboardButton(
+                                "Audio Only",
+                                callback_data={
+                                    "type": "video",
+                                    "metadata": url_metadata,
+                                    "extra_args": {
+                                        "audio_only": True,
+                                    },
+                                },
+                            ),
+                            InlineKeyboardButton(
+                                "Download HD",
+                                callback_data={
+                                    "type": "video",
+                                    "metadata": url_metadata,
+                                    "extra_args": {"hd": True},
+                                },
+                            ),
+                        ]
+                    )
+                    if OPENAI_API_KEY:
+                        special_function_keyboard.append(
+                            InlineKeyboardButton(
+                                "Transcribe Text",
+                                callback_data={
+                                    "type": "video",
+                                    "metadata": url_metadata,
+                                    "extra_args": {
+                                        "audio_only": True,
+                                        "transcribe": True,
+                                        "store_document": True,
+                                    },
+                                },
+                            ),
+                        )
+            elif url_metadata.content_type == "social_media":
+                basic_function_keyboard.extend(
+                    [
+                        InlineKeyboardButton(
+                            "Send to Me",
+                            callback_data={"type": "private", "metadata": url_metadata},
+                        ),
+                        InlineKeyboardButton(
+                            "Force Send in Chat",
+                            callback_data={"type": "force", "metadata": url_metadata},
+                        ),
+                    ]
+                )
+                if FILE_EXPORTER_ON:
+                    special_function_keyboard.append(
+                        InlineKeyboardButton(
+                            "Send with PDF",
+                            callback_data={
+                                "type": "pdf",
+                                "metadata": url_metadata,
+                                "extra_args": {"store_document": True},
+                            },
+                        ),
+                    )
+            basic_function_keyboard.append(
+                InlineKeyboardButton(
+                    "Cancel",
+                    callback_data={"type": "cancel"},
+                ),
+            )
+            keyboard = [
+                special_function_keyboard,
+                basic_function_keyboard,
+            ]
+            reply_markup = InlineKeyboardMarkup(keyboard)
+            await process_message.reply_text(
+                f"For the {i + 1}th url: {url}, please choose the function you want to use:",
+                reply_markup=reply_markup,
+            )
+            await process_message.delete()
+
+
+async def https_url_auto_process(update: Update, context: CallbackContext) -> None:
+    message = update.message
+    url_dict = message.parse_entities(types=["url"])
+    for i, url in enumerate(url_dict.values()):
+        url_metadata = await get_url_metadata(
+            url, ban_list=TELEGRAM_GROUP_MESSAGE_BAN_LIST
+        )
+        if url_metadata.source == "unknown" and GENERAL_SCRAPING_ON:
+            metadata_item = await content_process_function(url_metadata=url_metadata)
+            await send_item_message(
+                metadata_item, chat_id=message.chat_id, message=message
+            )
+        elif url_metadata.source == "unknown" or url_metadata.source == "banned":
+            logger.debug(f"for the {i + 1}th url {url}, no supported url found.")
+            return
+        if url_metadata.to_dict().get("source") in SOCIAL_MEDIA_WEBSITE_PATTERNS.keys():
+            metadata_item = await content_process_function(url_metadata=url_metadata)
+            await send_item_message(
+                metadata_item, chat_id=message.chat_id, message=message
+            )
+        if url_metadata.to_dict().get("source") in VIDEO_WEBSITE_PATTERNS.keys():
+            metadata_item = await content_process_function(url_metadata=url_metadata)
+            await send_item_message(
+                metadata_item, chat_id=message.chat_id, message=message
+            )
+
+
+async def all_messages_process(update: Update, context: CallbackContext) -> None:
+    message = update.message
+    logger.debug(message)
+    if message and DATABASE_ON:
+        telegram_chat = TelegramChat.construct(**message.chat.to_dict())
+        telegram_user = TelegramUser.construct(**message.from_user.to_dict())
+        telegram_message = TelegramMessage(
+            datetime=message.date,
+            chat=telegram_chat,
+            user=telegram_user,
+            text=message.text or "",
+        )
+        await save_instances(telegram_message)
+
+
+async def buttons_process(update: Update, context: CallbackContext) -> None:
+    from app.services.telegram_bot import application
+
+    query = update.callback_query
+    data = query.data
+    chat_id = None
+    if data["type"] == "cancel":
+        await query.answer("Canceled")
+    else:
+        if data["type"] == "private" or data["type"] == "force":
+            await query.answer("Sending to you...")
+        if data["type"] == "channel":
+            if data.get("channel_id") or len(TELEGRAM_CHANNEL_ID) == 1:
+                channel_chat = await application.bot.get_chat(
+                    chat_id=data.get("channel_id")
+                    if data.get("channel_id")
+                    else TELEGRAM_CHANNEL_ID[0]
+                )
+                await query.answer("Sending to channel...")
+                if channel_chat.type == "channel":
+                    chat_id = channel_chat.id
+                else:
+                    await query.message.reply_text(
+                        text="Sorry, the provided channel id does not exist or is not a channel."
+                    )
+                    chat_id = query.message.chat_id
+            elif len(TELEGRAM_CHANNEL_ID) > 1:
+                choose_channel_keyboard = await _create_choose_channel_keyboard(
+                    data=data
+                )
+                await query.message.reply_text(
+                    text="Please choose the channel you want to send:",
+                    reply_markup=InlineKeyboardMarkup(choose_channel_keyboard),
+                )
+                await query.message.delete()
+                context.drop_callback_data(query)
+                return
+        else:
+            chat_id = query.message.chat_id
+        if data["type"] == "video":
+            await query.answer("Video processing...")
+        replying_message = await query.message.reply_text(
+            text=f"Item processing...",
+        )
+        extra_args = data["extra_args"] if "extra_args" in data else {}
+        metadata_item = await content_process_function(
+            url_metadata=data["metadata"], **extra_args
+        )
+        await replying_message.edit_text(
+            text=f"Item processed. Sending to the target...",
+        )
+        if data["type"] == "force":
+            metadata_item["message_type"] = MessageType.SHORT
+        await send_item_message(metadata_item, chat_id=chat_id)
+        if data["type"] == "channel":
+            await query.message.reply_text(
+                text=f"Item sent to the channel.",
+            )
+        await replying_message.delete()
+    await query.message.delete()
+    context.drop_callback_data(query)
+
+
+async def _create_choose_channel_keyboard(data: dict) -> list:
+    from app.services.telegram_bot import application
+
+    choose_channel_keyboard = []
+    for i, channel_id in enumerate(TELEGRAM_CHANNEL_ID):
+        channel_chat = await application.bot.get_chat(chat_id=channel_id)
+        choose_channel_keyboard.append(
+            [
+                InlineKeyboardButton(
+                    channel_chat.title,
+                    callback_data={
+                        "type": "channel",
+                        "metadata": data["metadata"],
+                        "extra_args": data["extra_args"],
+                        "channel_id": channel_id,
+                    },
+                )
+            ]
+        )
+    choose_channel_keyboard.append(
+        [
+            InlineKeyboardButton(
+                "Cancel",
+                callback_data={"type": "cancel"},
+            )
+        ]
+    )
+    return choose_channel_keyboard
+
+
+async def invalid_buttons(update: Update, context: CallbackContext) -> None:
+    await update.callback_query.answer("Invalid button!")
+    await update.effective_message.edit_text(
+        "Sorry, Error Occurred, I could not process this button click 😕."
+    )
+
+
+async def error_process(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
+    logger.error("Exception while handling an update:", exc_info=context.error)
+    tb_list = traceback.format_exception(
+        None, context.error, context.error.__traceback__
+    )
+    tb_string = "".join(tb_list)
+    update_str = update.to_dict() if isinstance(update, Update) else str(update)
+    message = (
+        f"An exception was raised while handling an update\n"
+        f"<pre>update = {html.escape(json.dumps(update_str, indent=2, ensure_ascii=False))}"
+        "</pre>\n\n"
+        f"<pre>context.chat_data = {html.escape(str(context.chat_data))}</pre>\n\n"
+        f"<pre>context.user_data = {html.escape(str(context.user_data))}</pre>\n\n"
+        f"<pre>{html.escape(tb_string)}</pre>"
+    )
+    debug_chat_id = update.message.chat_id
+    if TELEBOT_DEBUG_CHANNEL is not None:
+        debug_chat_id = TELEBOT_DEBUG_CHANNEL
+    await context.bot.send_message(
+        chat_id=debug_chat_id, text=message, parse_mode=ParseMode.HTML
+    )
diff --git a/app/services/telegram_bot/message_sender.py b/app/services/telegram_bot/message_sender.py
new file mode 100644
index 0000000..8b60f8f
--- /dev/null
+++ b/app/services/telegram_bot/message_sender.py
@@ -0,0 +1,345 @@
+import asyncio
+import os
+import traceback
+from io import BytesIO
+from urllib.parse import urlparse
+from urllib.request import url2pathname
+from typing import Union
+
+import aiofiles
+from telegram import (
+    Message,
+    InputMediaPhoto,
+    InputMediaVideo,
+    InputMediaDocument,
+    InputMediaAnimation,
+    InputMediaAudio,
+)
+from telegram.constants import ParseMode
+
+from app.models.metadata_item import MessageType
+from app.models.classes import NamedBytesIO
+from app.utils.parse import telegram_message_html_trim
+from app.utils.network import download_file_by_metadata_item
+from app.utils.image import Image, image_compressing, check_image_type
+from app.utils.logger import logger
+from app.config import (
+    TELEBOT_API_SERVER,
+    TELEBOT_WRITE_TIMEOUT,
+    TELEGRAM_IMAGE_DIMENSION_LIMIT,
+    TELEGRAM_IMAGE_SIZE_LIMIT,
+    JINJA2_ENV,
+    TEMPLATE_LANGUAGE,
+)
+from app.services.telegram_bot.config import (
+    TELEGRAM_SINGLE_MESSAGE_MEDIA_LIMIT,
+    TELEGRAM_FILE_UPLOAD_LIMIT,
+    TELEGRAM_FILE_UPLOAD_LIMIT_LOCAL_API,
+    TEMPLATE_TRANSLATION,
+)
+
+environment = JINJA2_ENV
+template = environment.get_template("social_media_message.jinja2")
+template_text = TEMPLATE_TRANSLATION.get(
+    TEMPLATE_LANGUAGE, TEMPLATE_TRANSLATION["zh_CN"]
+)
+
+
+def _get_application():
+    """Lazy import to avoid circular dependency."""
+    from app.services.telegram_bot import application
+    return application
+
+
+async def send_item_message(
+        data: dict, chat_id: Union[int, str] = None, message: Message = None
+) -> None:
+    """
+    :param data: (dict) metadata of the item
+    :param chat_id: (int) any chat id for sending
+    :param message: (Message) any message to reply
+    :return:
+    """
+    application = _get_application()
+    logger.debug(f"send_item_message: {data}, {chat_id}, {message}")
+    if not chat_id and not message:
+        raise ValueError("must provide chat_id or message")
+    if (
+            not chat_id
+    ) and message:  # this function supports direct reply to a message even if the chat_id is None
+        chat_id = message.chat.id
+    discussion_chat_id = chat_id
+    the_chat = await application.bot.get_chat(chat_id=chat_id)
+    logger.debug(f"the chat of sending message: {the_chat}")
+    if the_chat.type == "channel" and the_chat.linked_chat_id:
+        discussion_chat_id = the_chat.linked_chat_id
+    try:
+        caption_text = message_formatting(data)
+        if len(data["media_files"]) > 0:
+            # if the message type is short and there are some media files, send media group
+            reply_to_message_id = None
+            media_message_group, file_message_group = await media_files_packaging(
+                media_files=data["media_files"], data=data
+            )
+            if (
+                    len(media_message_group) > 0
+            ):  # if there are some media groups to send, send it
+                for i, media_group in enumerate(media_message_group):
+                    caption_text = (
+                        caption_text
+                        if i == 0
+                        else f"the {i + 1}th part of the media item:"
+                    )
+                    logger.debug(f"media group: {media_group}")
+                    logger.debug(
+                        f"caption text: {caption_text},length={len(caption_text)}"
+                    )
+                    sent_media_files_message = await application.bot.send_media_group(
+                        chat_id=chat_id,
+                        media=media_group,
+                        parse_mode=ParseMode.HTML,
+                        caption=caption_text,
+                        write_timeout=TELEBOT_WRITE_TIMEOUT,
+                        reply_to_message_id=message.message_id if message else None,
+                    )
+                    if sent_media_files_message is tuple:
+                        reply_to_message_id = sent_media_files_message[0].message_id
+                    elif sent_media_files_message is Message:
+                        reply_to_message_id = sent_media_files_message.message_id
+                    logger.debug(f"sent media files message: {sent_media_files_message}")
+            else:
+                sent_message = await application.bot.send_message(
+                    chat_id=chat_id,
+                    text=caption_text,
+                    parse_mode=ParseMode.HTML,
+                    reply_to_message_id=message.message_id if message else None,
+                    disable_web_page_preview=True
+                    if data["message_type"] == MessageType.SHORT
+                    else False,
+                    disable_notification=True,
+                )
+            if discussion_chat_id != chat_id:
+                await asyncio.sleep(
+                    3
+                )  # wait for several seconds to avoid missing the target message
+                # if the chat is a channel, get the latest pinned message from the channel and reply to it
+                group_chat = await application.bot.get_chat(chat_id=discussion_chat_id)
+                logger.debug(f"the group chat: {group_chat}")
+                pinned_message = group_chat.pinned_message
+                logger.debug(f"the pinned message: {pinned_message}")
+                if len(media_message_group) > 0:
+                    if (
+                            pinned_message.forward_origin.message_id
+                            == sent_media_files_message[-1].message_id
+                    ):
+                        reply_to_message_id = (
+                                group_chat.pinned_message.id
+                                - len(sent_media_files_message)
+                                + 1
+                        )
+                    else:
+                        reply_to_message_id = group_chat.pinned_message.id + 1
+                elif pinned_message.forward_origin.message_id == sent_message.message_id:
+                    reply_to_message_id = group_chat.pinned_message.id
+                else:
+                    reply_to_message_id = group_chat.pinned_message.id + 1
+            if (
+                    len(file_message_group) > 0
+            ):  # to send files, the files messages should be replied to the message sent before
+                logger.debug(f"reply_to_message_id: {reply_to_message_id}")
+                for file_group in file_message_group:
+                    logger.debug(f"file group: {file_group}")
+                    await application.bot.send_media_group(
+                        chat_id=discussion_chat_id,
+                        media=file_group,
+                        reply_to_message_id=reply_to_message_id,
+                        parse_mode=ParseMode.HTML,
+                        disable_notification=True,
+                    )
+        else:
+            await application.bot.send_message(
+                chat_id=chat_id,
+                text=caption_text,
+                parse_mode=ParseMode.HTML,
+                reply_to_message_id=message.message_id if message else None,
+                disable_web_page_preview=True
+                if data["message_type"] == "short"
+                else False,
+                disable_notification=True,
+            )
+    except Exception as e:
+        logger.error(e)
+        traceback.print_exc()
+        await send_debug_channel(traceback.format_exc())
+
+
+async def send_debug_channel(message: str) -> None:
+    from app.config import TELEBOT_DEBUG_CHANNEL
+    application = _get_application()
+    if TELEBOT_DEBUG_CHANNEL is not None:
+        await application.bot.send_message(
+            chat_id=TELEBOT_DEBUG_CHANNEL, text=message, parse_mode=ParseMode.HTML
+        )
+
+
+def message_formatting(data: dict) -> str:
+    """
+    Format the message to be sent to the user.
+    :param data:
+    :return: text (str) the formatted text for telegram bot api sending message.
+    """
+    if data["message_type"] == "short":
+        data["text"] = telegram_message_html_trim(data["text"])
+    message_template = template
+    text = message_template.render(data=data, template_text=template_text)
+    logger.debug(f"message text: \n{text}")
+    return text
+
+
+async def media_files_packaging(media_files: list, data: dict) -> tuple:
+    """
+    Download the media files from data["media_files"] and package them into a list of media group or file group for
+    sending them by send_media_group method or send_document method.
+    :param data: (dict) metadata of the item
+    :param media_files: (list) a list of media files,
+    :return: (tuple) a tuple of media group and file group
+        media_message_group: (list) a list of media items, the type of each item is InputMediaPhoto or InputMediaVideo
+        file_group: (list) a list of file items, the type of each item is InputFile
+    TODO: It's not a good practice for this function. This method will still download all the media files even when
+        media files are too large and it can be memory consuming even if we use a database to store the media files.
+        The function should be optimized to resolve the media files one group by one group and send each group
+        immediately after it is resolved.
+        This processing method should be optimized in the future.
+    """
+    media_counter, file_counter = 0, 0
+    media_message_group, media_group, file_message_group, file_group = [], [], [], []
+    for (
+            media_item
+    ) in media_files:  # To traverse all media items in the media files list
+        # check if we need to create a new media group
+        if media_counter == TELEGRAM_SINGLE_MESSAGE_MEDIA_LIMIT:
+            # the limitation of media item for a single telegram media group message is 10
+            media_message_group.append(media_group)
+            media_group = []
+            media_counter = 0
+        if file_counter == TELEGRAM_SINGLE_MESSAGE_MEDIA_LIMIT:
+            # the limitation of media item for a single telegram media group message is 10
+            file_message_group.append(file_group)
+            file_group = []
+            file_counter = 0
+        if not (
+                media_item["media_type"] in ["image", "gif", "video"]
+                and data["message_type"] == "long"
+        ):
+            # check the url validity
+            url_parser = urlparse(media_item["url"])
+            if url_parser.scheme in [
+                "http",
+                "https",
+            ]:  # if the url is a http url, download the file
+                file_format = "mp4" if media_item["media_type"] == "video" else None
+                io_object = await download_file_by_metadata_item(
+                    media_item["url"], data=data, file_format=file_format
+                )
+                filename = io_object.name
+                file_size = io_object.size
+            else:  # if the url is a local file path, just add it to the media group
+                try:
+                    file_path = url2pathname(media_item["url"])
+                    async with aiofiles.open(file_path, mode="rb") as f:
+                        filename = os.path.basename(file_path)
+                        content = await f.read()
+                        io_object = NamedBytesIO(content=content, name=filename)
+                    file_size = io_object.size
+                except Exception as e:  # the url is not a valid file path
+                    logger.error(e)
+                    continue
+            # check the file size
+            if (
+                    not TELEBOT_API_SERVER
+            ):  # the official telegram bot api server only supports 50MB file
+                if file_size > TELEGRAM_FILE_UPLOAD_LIMIT:
+                    # if the size is over 50MB, skip this file
+                    continue
+            else:
+                if file_size > TELEGRAM_FILE_UPLOAD_LIMIT_LOCAL_API:
+                    # for local api sever, if the size is over 2GB, skip this file
+                    continue
+            # check media files' type and process them by their type
+            if media_item["media_type"] == "image":
+                image_url = media_item["url"]
+                ext = await check_image_type(io_object)
+                # jpg to jpeg, ignore case
+                if ext.lower() == "jpg":
+                    ext = "JPEG"
+                io_object.seek(0)
+                image = Image.open(io_object, formats=[ext])
+                img_width, img_height = image.size
+                ratio = float(max(img_height, img_width)) / float(
+                    min(img_height, img_width)
+                )
+                # don't try to resize image if the ratio is too large
+                if (
+                        ratio < 5
+                        or max(img_height, img_width) < TELEGRAM_IMAGE_DIMENSION_LIMIT
+                ):
+                    image = image_compressing(image, TELEGRAM_IMAGE_DIMENSION_LIMIT)
+                    with BytesIO() as buffer:
+                        # mime_type file format
+                        image.save(buffer, format=ext)
+                        buffer.seek(0)
+                        resized_ratio = max(image.height, image.width) / min(
+                            image.height, image.width
+                        )
+                        logger.debug(
+                            f"resized image size: {buffer.getbuffer().nbytes}, ratio: {resized_ratio}, width: {image.width}, height: {image.height}"
+                        )
+                        media_group.append(InputMediaPhoto(buffer, filename=filename))
+                # the image is not able to get json serialized
+                logger.debug(
+                    f"image size: {file_size}, ratio: {ratio}, width: {img_width}, height: {img_height}"
+                )
+                if (
+                        file_size > TELEGRAM_IMAGE_SIZE_LIMIT
+                        or img_width > TELEGRAM_IMAGE_DIMENSION_LIMIT
+                        or img_height > TELEGRAM_IMAGE_DIMENSION_LIMIT
+                ) and data["category"] not in ["xiaohongshu"]:
+                    io_object = await download_file_by_metadata_item(
+                        url=image_url, data=data
+                    )
+                    if not io_object.name.endswith(".gif"):
+                        if not io_object.name.endswith(ext.lower()):
+                            io_object.name = io_object.name + "." + ext.lower()
+                        # TODO: it is not a good way to judge whether it is a gif...
+                        file_group.append(
+                            InputMediaDocument(io_object, parse_mode=ParseMode.HTML)
+                        )
+                        file_counter += 1
+            elif media_item["media_type"] == "gif":
+                io_object = await download_file_by_metadata_item(
+                    url=media_item["url"],
+                    data=data,
+                    file_name="gif_image-" + str(media_counter) + ".gif",
+                )
+                io_object.name = io_object.name + ".gif"
+                media_group.append(InputMediaAnimation(io_object))
+            elif media_item["media_type"] == "video":
+                media_group.append(InputMediaVideo(io_object, supports_streaming=True))
+            # TODO: not have any services to store audio files for now, just a placeholder
+            elif media_item["media_type"] == "audio":
+                media_group.append(InputMediaAudio(io_object))
+            elif media_item["media_type"] == "document":
+                file_group.append(
+                    InputMediaDocument(io_object, parse_mode=ParseMode.HTML)
+                )
+                file_counter += 1
+            media_counter += 1
+            logger.info(
+                f"get the {media_counter}th media item,type: {media_item['media_type']}, url: {media_item['url']}"
+            )
+    # check if the media group is empty, if it is, return None
+    if len(media_group) > 0:  # append the last media group
+        media_message_group.append(media_group)
+    if len(file_group) > 0:
+        file_message_group.append(file_group)
+    return media_message_group, file_message_group
diff --git a/app/utils/config.py b/app/utils/config.py
index 2c9b6a3..ad3d691 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -1,55 +1,7 @@
-"""
-patterns for check url type
-"""
-SOCIAL_MEDIA_WEBSITE_PATTERNS = {
-    "weibo": [
-        r"(m\.)?weibo.cn\/(status\/)?[0-9a-zA-Z]+",
-        r"(www\.)?weibo\.com\/(status\/)?[0-9a-zA-Z]+",
-    ],
-    "twitter": [r"(twitter|x)\.com\/[^\/]+\/status\/[0-9]+"],
-    "instagram": [r"(www\.)?instagram\.com(\/share)?\/(p|reel)\/[A-Za-z0-9_-]+"],
-    "zhihu": [
-        r"(www\.)?zhihu\.com\/question\/[0-9]+\/answer\/[0-9]+",
-        r"(www\.)?zhihu\.com\/answer\/[0-9]+",
-        r"(www\.)?zhihu\.com\/aria\/answer\/[0-9]+",
-        r"(www\.)?zhihu\.com\/aria\/question\/[0-9]+\/answer\/[0-9]+",
-        r"(www\.)?zhihu\.com\/pin\/[0-9]+",
-        r"zhuanlan\.zhihu\.com\/p\/[0-9]+",
-    ],
-    "douban": [
-        r"(game|music|movie|book)?\.douban\.com\/review\/[0-9]+",
-        r"((www|m)\.)?douban\.com\/note\/[0-9]+",
-        r"((www|m)\.)?douban\.com\/people\/[^\/]+\/status\/[0-9]+",
-        r"((www|m)\.)?douban\.com\/group\/topic\/[0-9]+",
-        r"((www|m)\.)?douban\.com\/(game|music|movie|book)\/review\/[0-9]+",
-    ],
-    "wechat": [r"mp\.weixin\.qq\.com\/s", r"mp\.weixin\.qq\.com\/mp\/appmsg\/show"],
-    "threads": [r"(www\.)?threads\.net\/@[a-zA-Z0-9]+\/post"],
-    "xiaohongshu": [
-        r"(www\.)?xiaohongshu\.com\/(discovery\/item|explore)\/[0-9a-zA-Z_-]+",
-        r"(www\.)?xhslink\.com\/[0-9a-zA-Z_-]+",
-    ],
-    "reddit": [
-        r"(www\.)?reddit\.com\/r\/[a-zA-Z0-9_-]+\/comments\/[a-zA-Z0-9_-]+",
-        r"(www\.)?reddit\.com\/r\/[a-zA-Z0-9_-]+\/s\/[a-zA-Z0-9_-]+",
-    ],
-    "bluesky": [
-        r"(www\.)?bsky\.app\/profile/[a-zA-Z0-9\.]+\/post\/[a-zA-Z0-9\-_]+",
-    ]
-}
-VIDEO_WEBSITE_PATTERNS = {
-    "youtube": [
-        r"((m|www)\.)youtube\.com\/watch",
-        r"youtu\.be\/[A-Za-z0-9_-]+",
-        r"youtube\.com\/shorts\/[A-Za-z0-9_-]+",
-    ],
-    "bilibili": [
-        r"((www\.)?bilibili\.com\/video\/[A-Za-z0-9]+)",
-        r"b23\.tv\/[A-Za-z0-9]+",
-    ],
-}
-BANNED_PATTERNS = [
-    r"chatgpt\.com\/share\/[A-Za-z0-9]+",
-    r"gemini\/share\/[A-Za-z0-9]+",
-    r"t\.me\/[A-Za-z0-9]+"
-]
\ No newline at end of file
+# Re-export from shared package
+from fastfetchbot_shared.utils.config import *  # noqa: F401,F403
+from fastfetchbot_shared.utils.config import (  # noqa: F401
+    SOCIAL_MEDIA_WEBSITE_PATTERNS,
+    VIDEO_WEBSITE_PATTERNS,
+    BANNED_PATTERNS,
+)
diff --git a/app/utils/image.py b/app/utils/image.py
index 1e0a4af..500afcd 100644
--- a/app/utils/image.py
+++ b/app/utils/image.py
@@ -1,46 +1,9 @@
-import mimetypes
-from io import BytesIO
-
-import magic
-from PIL import Image
-import asyncio
-from app.config import env
-
-DEFAULT_IMAGE_LIMITATION = env.get("DEFAULT_IMAGE_LIMITATION", 1600)
-
-
-def get_image_dimension(image_file: str):
-    image = Image.open(image_file)
-    return image.size
-
-
-def image_compressing(image: Image, limitation: int = DEFAULT_IMAGE_LIMITATION):
-    new_image = image
-    if image.size[0] > limitation or image.size[1] > limitation:
-        if image.size[0] > image.size[1]:
-            new_image = image.resize(
-                (limitation, int(image.size[1] * limitation / image.size[0])),
-                Image.Resampling.LANCZOS,
-            )
-        else:
-            new_image = image.resize(
-                (int(image.size[0] * limitation / image.size[1]), limitation),
-                Image.Resampling.LANCZOS,
-            )
-    return new_image
-
-
-async def check_image_type(io_object: BytesIO):
-    loop = asyncio.get_running_loop()
-    mime_type = await loop.run_in_executor(
-        None, lambda: magic.from_buffer(io_object.read(), mime=True)
-    )
-    if mime_type == "image/webp":
-        ext = "webp"
-    else:
-        ext = mimetypes.guess_extension(mime_type, strict=True)
-        if ext is None:
-            ext = "webp"
-        else:
-            ext = ext[1:]
-    return ext
+# Re-export from shared package
+from fastfetchbot_shared.utils.image import *  # noqa: F401,F403
+from fastfetchbot_shared.utils.image import (  # noqa: F401
+    Image,
+    get_image_dimension,
+    image_compressing,
+    check_image_type,
+    DEFAULT_IMAGE_LIMITATION,
+)
diff --git a/app/utils/logger.py b/app/utils/logger.py
index b7e2d46..1d4ac5f 100644
--- a/app/utils/logger.py
+++ b/app/utils/logger.py
@@ -1,18 +1,2 @@
-import logging
-import os
-
-from loguru import logger
-
-from app.config import LOG_LEVEL, LOG_FILE_PATH
-
-log_path = os.path.join(LOG_FILE_PATH, "app.log")
-
-logger.add(
-    log_path,
-    level=LOG_LEVEL,
-    rotation="1 week",
-    retention="10 days",
-    compression="zip",
-)
-logger.debug(f"Logger initialized with level: {LOG_LEVEL}")
-logger.debug(f"Logger initialized with log file path: {log_path}")
+# Re-export from shared package
+from fastfetchbot_shared.utils.logger import logger  # noqa: F401
diff --git a/app/utils/network.py b/app/utils/network.py
index ff7ec1f..bb422db 100644
--- a/app/utils/network.py
+++ b/app/utils/network.py
@@ -1,202 +1,13 @@
-import asyncio
-import datetime
-import json
-import os
-import uuid
-from typing import Optional
-
-import aiofiles
-import httpx
-import traceback
-
-from lxml import etree
-from fake_useragent import UserAgent
-from playwright.async_api import async_playwright
-
-from app.models.classes import NamedBytesIO
-from app.config import HTTP_REQUEST_TIMEOUT, DOWNLOAD_DIR
-from app.utils.image import check_image_type
-from app.utils.logger import logger
-
-
-async def get_response(
-        url: str, headers: dict = None, params: dict = None, client: httpx.AsyncClient = None
-) -> httpx.Response:
-    if headers is None:
-        headers = HEADERS
-    if client:
-        resp = await client.get(
-            url, headers=headers, params=params, timeout=HTTP_REQUEST_TIMEOUT
-        )
-        return resp
-    else:
-        async with httpx.AsyncClient() as client:
-            resp = await client.get(
-                url, headers=headers, params=params, timeout=HTTP_REQUEST_TIMEOUT
-            )
-            return resp
-
-
-async def get_response_json(url: str, headers=None, client: httpx.AsyncClient = None) -> dict:
-    try:
-        response = await get_response(url, headers=headers, client=client)
-        json_result = response.json()
-    except Exception as e:
-        print(e, traceback.format_exc())
-        json_result = None
-    return json_result
-
-
-
-async def get_selector(
-        url: str, headers: dict, follow_redirects: bool = True
-) -> etree.HTML:
-    """
-    A function to get etree.HTML selector according to url and headers.
-    We can use this function to do additional parsing works.
-    :param follow_redirects:
-    :param url: the target webpage url
-    :param headers: the headers of the request
-    :return: the selector of the target webpage parsed by etree.HTML
-    """
-    async with httpx.AsyncClient() as client:
-        resp = await client.get(
-            url,
-            headers=headers,
-            follow_redirects=follow_redirects,
-            timeout=HTTP_REQUEST_TIMEOUT,
-        )
-        if (
-                resp.history
-        ):  # if there is a redirect, the request will have a response chain
-            print("Request was redirected")
-            for h in resp.history:
-                print(h.status_code, h.url)
-                # if code is 302, do not follow the redirect
-                if h.status_code == 302:
-                    selector = await get_selector(
-                        h.url, headers=headers, follow_redirects=False
-                    )
-                    return selector
-            print("Final destination:", resp.status_code, resp.url)
-        selector = etree.HTML(resp.text)  # the content of the final destination
-        return selector
-
-
-async def get_redirect_url(url: str, headers: Optional[dict] = None) -> str:
-    if not headers:
-        headers = HEADERS
-    async with httpx.AsyncClient() as client:
-        resp = await client.get(url, headers=headers, timeout=HTTP_REQUEST_TIMEOUT)
-        if resp.status_code == 302 or resp.status_code == 301:
-            return resp.headers["Location"]
-        else:
-            return url
-
-
-async def get_content_async(url):
-    async with async_playwright() as p:
-        browser = await p.firefox.launch()
-        context = await browser.new_context(viewport={"width": 1920, "height": 1080})
-        page = await context.new_page()
-
-        async def scroll_to_end(page):
-            # Scrolls to the bottom of the page
-            await page.evaluate("""
-                async () => {
-                    const delay = ms => new Promise(resolve => setTimeout(resolve, ms));
-                    while (document.scrollingElement.scrollTop + window.innerHeight < document.scrollingElement.scrollHeight) {
-                        document.scrollingElement.scrollTop += 100;  // Adjust the scroll amount
-                        await delay(100);  // Adjust the delay time
-                    }
-                }
-            """)
-
-        async def wait_for_network_idle():
-            async with page.expect_response("**/api/content") as response_info:
-                response = await response_info.value
-                if response.status == 200:
-                    print("Content loaded")
-
-        await page.goto(url)
-        await wait_for_network_idle()
-        await scroll_to_end(page)
-        content = await page.content()
-        await browser.close()
-        return content
-
-
-async def download_file_by_metadata_item(
-        url: str,
-        data: dict,
-        file_name: str = None,
-        file_format: str = None,
-        headers: dict = None,
-) -> NamedBytesIO:
-    """
-    A customized function to download a file from url and return a NamedBytesIO object.
-    :param file_format:
-    :param data:
-    :param url:
-    :param file_name:
-    :param headers:
-    :return:
-    """
-    try:
-        if headers is None:
-            headers = HEADERS
-        headers["User-Agent"] = get_random_user_agent()
-        headers["referer"] = data["url"]
-        if data["category"] in ["reddit"]:
-            headers["Accept"] = "image/avif,image/webp,*/*"
-        async with httpx.AsyncClient() as client:
-            response = await client.get(
-                url=url, headers=headers, timeout=HTTP_REQUEST_TIMEOUT
-            )
-            # if redirect 302, get the final url
-            if response.status_code == 302 or response.status_code == 301:
-                url = response.headers["Location"]
-        file_data = response.content
-        if file_name is None:
-            file_format = file_format if file_format else url.split(".")[-1]
-            file_name = "media-" + str(uuid.uuid1())[:8] + "." + file_format
-        io_object = NamedBytesIO(file_data, name=file_name)
-        return io_object
-    except Exception as e:
-        await asyncio.sleep(2)
-        logger.error(f"Failed to download {url}, {e}")
-
-
-async def download_file_to_local(
-        url: str,
-        file_path: str = None,
-        dir_path: str = DOWNLOAD_DIR,
-        file_name: str = "",
-        headers: dict = None,
-        referer: str = None,
-) -> str:
-    io_object = await download_file_by_metadata_item(url=url, data={}, file_name=file_name, headers=headers)
-    ext = await check_image_type(io_object)
-    io_object.seek(0)
-    file_name = file_name + uuid.uuid4().hex + "." + ext
-    logger.info(f"Downloading {file_name}")
-    if file_path is None and dir_path is not None:
-        file_path = os.path.join(dir_path, file_name)
-    async with aiofiles.open(file_path, "wb") as f:
-        await f.write(io_object.read())
-    return file_path
-
-
-def get_random_user_agent() -> str:
-    ua = UserAgent()
-    return ua.random
-
-
-"""
-default headers
-"""
-
-HEADERS = {
-    "User-Agent": get_random_user_agent(),
-    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
-}
+# Re-export from shared package
+from fastfetchbot_shared.utils.network import *  # noqa: F401,F403
+from fastfetchbot_shared.utils.network import (  # noqa: F401
+    get_response,
+    get_response_json,
+    get_selector,
+    get_redirect_url,
+    get_content_async,
+    download_file_by_metadata_item,
+    download_file_to_local,
+    get_random_user_agent,
+    HEADERS,
+)
diff --git a/app/utils/parse.py b/app/utils/parse.py
index 53c55e4..8843e7e 100644
--- a/app/utils/parse.py
+++ b/app/utils/parse.py
@@ -1,224 +1,16 @@
-import datetime
-import os
-import re
-import mimetypes
-from typing import Optional
-from urllib.parse import urlparse, unquote
-
-from bs4 import BeautifulSoup
-
-from app.models.url_metadata import UrlMetadata
-from app.utils.config import SOCIAL_MEDIA_WEBSITE_PATTERNS, VIDEO_WEBSITE_PATTERNS, BANNED_PATTERNS
-
-TELEGRAM_TEXT_LIMIT = 900
-
-mimetypes.init()
-
-
-def get_html_text_length(html: str) -> int:
-    if html is None:
-        return 0
-    soup = BeautifulSoup(html, "html.parser")
-    text = soup.get_text()
-    return len(text)
-
-
-def format_telegram_short_text(soup: BeautifulSoup) -> BeautifulSoup:
-    decompose_list = ["br"]
-    unwrap_list = ["span", "div", "blockquote", "h2", "ol", "ul"]
-    new_line_list = ["p", "li"]
-    for decompose in decompose_list:
-        for item in soup.find_all(decompose):
-            item.decompose()
-    for unwrap in unwrap_list:
-        for item in soup.find_all(unwrap):
-            item.unwrap()
-    for (
-            new_line
-    ) in (
-            new_line_list
-    ):  # add a new line after each <p> and <li> tag and then remove the tag(unwrapping)
-        for item in soup.find_all(new_line):
-            item.append(BeautifulSoup("<br>", "html.parser"))
-            item.unwrap()
-    return soup
-
-
-def unix_timestamp_to_utc(timestamp: int) -> str | None:
-    if not timestamp:
-        return None
-    utc_time = datetime.datetime.utcfromtimestamp(timestamp)
-    beijing_time = utc_time + datetime.timedelta(hours=8)
-    return beijing_time.strftime("%Y-%m-%d %H:%M")
-
-
-def second_to_time(second: int) -> str:
-    m, s = divmod(second, 60)
-    h, m = divmod(m, 60)
-    return "{:02d}:{:02d}:{:02d}".format(h, m, s)
-
-
-def string_to_list(string: str, divider: str = ",") -> list:
-    if string is None:
-        return []
-    return string.split(divider)
-
-
-async def get_url_metadata(url: str, ban_list: Optional[list] = None) -> UrlMetadata:
-    if not ban_list:
-        ban_list = []
-    url_parser = urlparse(url)
-    url_main = str(url_parser.hostname) + str(url_parser.path)
-    source, content_type = "unknown", "unknown"
-    # check if the url is a social media platform website
-    for website, patterns in SOCIAL_MEDIA_WEBSITE_PATTERNS.items():
-        for pattern in patterns:
-            if re.search(pattern, url_main):
-                source = website
-                content_type = "social_media"
-    # check if the url is a video website
-    if source == "unknown":
-        for website, patterns in VIDEO_WEBSITE_PATTERNS.items():
-            for pattern in patterns:
-                if re.search(pattern, url_main):
-                    source = website
-                    content_type = "video"
-    # clear the url query
-    if source not in ["youtube", "bilibili", "wechat"]:
-        url = url_parser.scheme + "://" + url_parser.netloc + url_parser.path
-    if source in ban_list:
-        source = "banned"
-        content_type = "banned"
-    else:
-        for item in BANNED_PATTERNS:
-            if re.search(item, url):
-                source = "banned"
-                content_type = "banned"
-                break
-    # TODO: check if the url is from Mastodon, according to the request cookie
-    return UrlMetadata(url=url, source=source, content_type=content_type)
-
-
-def get_ext_from_url(url: str) -> str:
-    url_object = urlparse(url)
-    filename = unquote(url_object.path)
-    ext = os.path.splitext(filename)[1]
-    # check if ext in mimetypes.types_map
-    if ext in mimetypes.types_map:
-        return ext
-    else:
-        return None
-
-
-def wrap_text_into_html(text: str, is_html: bool = False) -> str:
-    if is_html:
-        soup = BeautifulSoup(text, "html.parser")
-        for item in soup.find_all("br"):
-            item.replace_with("\n")
-        text = str(soup)
-    text_list = text.split("\n")
-    text_list = [f"<p>{item}</p>" for item in text_list if item.strip() != ""]
-    text = "".join(text_list)
-    return text
-
-
-def telegram_message_html_trim(html_content: str, trim_length: int = TELEGRAM_TEXT_LIMIT) -> str:
-    from bs4 import Doctype
-
-    soup = BeautifulSoup(html_content, "html.parser")
-
-    # Remove DOCTYPE declarations
-    for item in soup.contents:
-        if isinstance(item, Doctype):
-            item.extract()
-
-    # Decompose tags that should be removed entirely (with their content)
-    for tag_name in ["img", "script", "style", "head", "meta", "link", "noscript", "iframe", "svg", "form", "input", "button"]:
-        for tag in soup.find_all(tag_name):
-            tag.decompose()
-
-    # Unwrap structural/layout tags — keep their text, discard the wrapper
-    for tag_name in ["div", "span", "section", "article", "nav", "header", "footer",
-                     "main", "aside", "figure", "figcaption", "html", "body"]:
-        for tag in soup.find_all(tag_name):
-            tag.unwrap()
-
-    # Convert headings to bold text with line break
-    for level in range(1, 7):
-        for tag in soup.find_all(f"h{level}"):
-            tag.name = "b"
-
-    # Unwrap <p> tags (keep text content)
-    for tag in soup.find_all("p"):
-        tag.unwrap()
-
-    html_content = str(soup).strip()
-
-    if len(html_content) <= trim_length:
-        return html_content
-
-    # Initial trimming
-    trimmed_content = html_content[:trim_length]
-
-    # Find the position of the last complete tag in the trimmed content
-    last_complete_pos = trimmed_content.rfind('<')
-    if last_complete_pos != -1:
-        trimmed_content = trimmed_content[:last_complete_pos]
-
-    # Remove any incomplete tags by ensuring each tag is closed
-    cleaned_html = ''
-    open_tags = []
-
-    tag_pattern = re.compile(r'<(/?)([a-zA-Z0-9]+)([^>]*)>')
-    pos = 0
-
-    while pos < len(trimmed_content):
-        match = tag_pattern.search(trimmed_content, pos)
-        if not match:
-            break
-
-        start, end = match.span()
-        cleaned_html += trimmed_content[pos:start]
-
-        closing, tag_name, attributes = match.groups()
-
-        if closing:
-            if open_tags and open_tags[-1] == tag_name:
-                open_tags.pop()
-                cleaned_html += match.group(0)
-        else:
-            if not attributes.endswith('/'):
-                open_tags.append(tag_name)
-                cleaned_html += match.group(0)
-
-        pos = end
-
-    cleaned_html += trimmed_content[pos:]
-
-    # Ensure to close all open tags
-    for tag in reversed(open_tags):
-        cleaned_html += f'</{tag}>'
-
-    return cleaned_html + ' ...'
-
-
-def get_bool(value: Optional[str], default: bool = True) -> bool:
-    true_values = ("True", "true", "1", "yes", "on")
-    false_values = ("False", "false", "0", "no", "off")
-
-    if value is None:
-        return default
-    value = value.lower()
-
-    if value in true_values:
-        return True
-    elif value in false_values:
-        return False
-    else:
-        return default
-
-
-def get_env_bool(env, var_name: Optional[str], default: bool = False):
-    """Retrieve environment variable as a boolean."""
-    value = env.get(var_name, "").lower()
-    return get_bool(value, default)
+# Re-export from shared package
+from fastfetchbot_shared.utils.parse import *  # noqa: F401,F403
+from fastfetchbot_shared.utils.parse import (  # noqa: F401
+    get_html_text_length,
+    format_telegram_short_text,
+    unix_timestamp_to_utc,
+    second_to_time,
+    string_to_list,
+    get_url_metadata,
+    get_ext_from_url,
+    wrap_text_into_html,
+    telegram_message_html_trim,
+    get_bool,
+    get_env_bool,
+    TELEGRAM_TEXT_LIMIT,
+)

From e4ee85e4b081a71a5ca6f76eec05a7c38ef2b3f7 Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Wed, 18 Feb 2026 01:42:06 -0600
Subject: [PATCH 2/8] feat: refactor the codebase

---
 apps/api/Dockerfile                           |  88 ++
 apps/api/pyproject.toml                       |  41 +
 apps/api/src/__init__.py                      |   0
 apps/api/src/auth.py                          |  19 +
 apps/api/src/config.py                        | 154 ++++
 apps/api/src/database.py                      |  37 +
 apps/api/src/main.py                          |  55 ++
 apps/api/src/models/__init__.py               |   0
 apps/api/src/models/database_model.py         |  41 +
 apps/api/src/routers/__init__.py              |   0
 apps/api/src/routers/inoreader.py             |  38 +
 apps/api/src/routers/scraper.py               |  37 +
 apps/api/src/routers/scraper_routers.py       |   6 +
 apps/api/src/routers/wechat.py                |  29 +
 apps/api/src/services/__init__.py             |   0
 apps/api/src/services/amazon/__init__.py      |   0
 apps/api/src/services/amazon/s3.py            |  67 ++
 apps/api/src/services/file_export/__init__.py |   0
 .../file_export/audio_transcribe/__init__.py  |  30 +
 .../file_export/document_export/__init__.py   |  10 +
 .../file_export/document_export/pdf_export.py |  89 ++
 .../file_export/video_download/__init__.py    | 232 +++++
 apps/api/src/services/inoreader/__init__.py   | 168 ++++
 apps/api/src/services/inoreader/process.py    | 108 +++
 apps/api/src/services/scrapers/__init__.py    |   0
 .../src/services/scrapers/bluesky/__init__.py |  45 +
 .../src/services/scrapers/bluesky/config.py   |   3 +
 .../src/services/scrapers/bluesky/scraper.py  | 191 +++++
 apps/api/src/services/scrapers/common.py      | 114 +++
 .../src/services/scrapers/douban/__init__.py  | 230 +++++
 .../src/services/scrapers/general/__init__.py |  40 +
 .../api/src/services/scrapers/general/base.py | 208 +++++
 .../services/scrapers/general/firecrawl.py    |  65 ++
 .../scrapers/general/firecrawl_client.py      |  94 +++
 .../src/services/scrapers/general/scraper.py  |  86 ++
 .../api/src/services/scrapers/general/zyte.py |  78 ++
 .../services/scrapers/instagram/__init__.py   | 271 ++++++
 .../src/services/scrapers/instagram/config.py |  33 +
 .../src/services/scrapers/reddit/__init__.py  | 124 +++
 apps/api/src/services/scrapers/scraper.py     |  19 +
 .../src/services/scrapers/scraper_manager.py  |  61 ++
 .../src/services/scrapers/threads/__init__.py | 191 +++++
 .../src/services/scrapers/twitter/__init__.py | 381 +++++++++
 .../src/services/scrapers/twitter/config.py   |  31 +
 .../src/services/scrapers/wechat/__init__.py  | 102 +++
 .../src/services/scrapers/weibo/__init__.py   |  54 ++
 .../api/src/services/scrapers/weibo/config.py |   5 +
 .../src/services/scrapers/weibo/scraper.py    | 501 +++++++++++
 .../services/scrapers/xiaohongshu/__init__.py | 153 ++++
 .../scrapers/xiaohongshu/xhs/__init__.py      |   2 +
 .../scrapers/xiaohongshu/xhs/base_crawler.py  |  35 +
 .../scrapers/xiaohongshu/xhs/client.py        | 217 +++++
 .../services/scrapers/xiaohongshu/xhs/core.py | 225 +++++
 .../scrapers/xiaohongshu/xhs/exception.py     |   9 +
 .../scrapers/xiaohongshu/xhs/field.py         |  72 ++
 .../services/scrapers/xiaohongshu/xhs/help.py | 262 ++++++
 .../scrapers/xiaohongshu/xhs/login.py         | 132 +++
 .../xiaohongshu/xhs/proxy_account_pool.py     | 132 +++
 .../scrapers/xiaohongshu/xhs/utils.py         | 146 ++++
 .../src/services/scrapers/zhihu/__init__.py   | 792 ++++++++++++++++++
 .../api/src/services/scrapers/zhihu/config.py |  23 +
 apps/api/src/services/telegraph/__init__.py   |  74 ++
 apps/api/src/templates/bluesky_content.jinja2 |  19 +
 .../templates/bluesky_telegram_text.jinja2    |   1 +
 apps/api/src/templates/douban_content.jinja2  |   5 +
 .../src/templates/douban_short_text.jinja2    |  11 +
 apps/api/src/templates/reddit_content.jinja2  |   7 +
 .../src/templates/reddit_short_text.jinja2    |   3 +
 apps/api/src/templates/video_info.jinja2      |   6 +
 apps/api/src/templates/weibo_content.jinja2   |  11 +
 .../api/src/templates/weibo_short_text.jinja2 |   5 +
 .../src/templates/xiaohongshu_content.jinja2  |  10 +
 .../templates/xiaohongshu_short_text.jinja2   |   2 +
 apps/api/src/templates/zhihu_content.jinja2   |  47 ++
 .../api/src/templates/zhihu_short_text.jinja2 |  11 +
 apps/telegram-bot/Dockerfile                  |  50 ++
 apps/telegram-bot/core/__init__.py            |   0
 apps/telegram-bot/core/api_client.py          |  34 +
 apps/telegram-bot/core/config.py              | 136 +++
 apps/telegram-bot/core/database.py            |  37 +
 apps/telegram-bot/core/handlers/__init__.py   |   0
 apps/telegram-bot/core/handlers/buttons.py    | 118 +++
 apps/telegram-bot/core/handlers/messages.py   |  58 ++
 .../telegram-bot/core/handlers/url_process.py | 225 +++++
 apps/telegram-bot/core/main.py                |  13 +
 apps/telegram-bot/core/models/__init__.py     |   0
 .../core/models/database_model.py             |   4 +
 .../telegram-bot/core/models/telegram_chat.py |  33 +
 apps/telegram-bot/core/services/__init__.py   |   0
 apps/telegram-bot/core/services/bot_app.py    | 183 ++++
 apps/telegram-bot/core/services/constants.py  |  40 +
 .../core/services/message_sender.py           | 345 ++++++++
 .../templates/social_media_message.jinja2     |  32 +
 apps/telegram-bot/core/webhook/__init__.py    |   0
 apps/telegram-bot/core/webhook/server.py      |  87 ++
 apps/telegram-bot/pyproject.toml              |  27 +
 docker-compose.template.yml                   |  26 +-
 .../shared/fastfetchbot_shared/__init__.py    |   0
 packages/shared/fastfetchbot_shared/config.py |  19 +
 .../fastfetchbot_shared/models/__init__.py    |   0
 .../fastfetchbot_shared/models/classes.py     |  17 +
 .../models/metadata_item.py                   | 123 +++
 .../models/telegraph_item.py                  |  58 ++
 .../models/url_metadata.py                    |  50 ++
 .../fastfetchbot_shared/utils/__init__.py     |   0
 .../fastfetchbot_shared/utils/config.py       |  55 ++
 .../shared/fastfetchbot_shared/utils/image.py |  46 +
 .../fastfetchbot_shared/utils/logger.py       |  17 +
 .../fastfetchbot_shared/utils/network.py      | 200 +++++
 .../shared/fastfetchbot_shared/utils/parse.py | 224 +++++
 packages/shared/pyproject.toml                |  20 +
 pyproject.toml                                |   8 +-
 template.env                                  |   8 +
 uv.lock                                       | 121 ++-
 114 files changed, 8988 insertions(+), 14 deletions(-)
 create mode 100644 apps/api/Dockerfile
 create mode 100644 apps/api/pyproject.toml
 create mode 100644 apps/api/src/__init__.py
 create mode 100644 apps/api/src/auth.py
 create mode 100644 apps/api/src/config.py
 create mode 100644 apps/api/src/database.py
 create mode 100644 apps/api/src/main.py
 create mode 100644 apps/api/src/models/__init__.py
 create mode 100644 apps/api/src/models/database_model.py
 create mode 100644 apps/api/src/routers/__init__.py
 create mode 100644 apps/api/src/routers/inoreader.py
 create mode 100644 apps/api/src/routers/scraper.py
 create mode 100644 apps/api/src/routers/scraper_routers.py
 create mode 100644 apps/api/src/routers/wechat.py
 create mode 100644 apps/api/src/services/__init__.py
 create mode 100644 apps/api/src/services/amazon/__init__.py
 create mode 100644 apps/api/src/services/amazon/s3.py
 create mode 100644 apps/api/src/services/file_export/__init__.py
 create mode 100644 apps/api/src/services/file_export/audio_transcribe/__init__.py
 create mode 100644 apps/api/src/services/file_export/document_export/__init__.py
 create mode 100644 apps/api/src/services/file_export/document_export/pdf_export.py
 create mode 100644 apps/api/src/services/file_export/video_download/__init__.py
 create mode 100644 apps/api/src/services/inoreader/__init__.py
 create mode 100644 apps/api/src/services/inoreader/process.py
 create mode 100644 apps/api/src/services/scrapers/__init__.py
 create mode 100644 apps/api/src/services/scrapers/bluesky/__init__.py
 create mode 100644 apps/api/src/services/scrapers/bluesky/config.py
 create mode 100644 apps/api/src/services/scrapers/bluesky/scraper.py
 create mode 100644 apps/api/src/services/scrapers/common.py
 create mode 100644 apps/api/src/services/scrapers/douban/__init__.py
 create mode 100644 apps/api/src/services/scrapers/general/__init__.py
 create mode 100644 apps/api/src/services/scrapers/general/base.py
 create mode 100644 apps/api/src/services/scrapers/general/firecrawl.py
 create mode 100644 apps/api/src/services/scrapers/general/firecrawl_client.py
 create mode 100644 apps/api/src/services/scrapers/general/scraper.py
 create mode 100644 apps/api/src/services/scrapers/general/zyte.py
 create mode 100644 apps/api/src/services/scrapers/instagram/__init__.py
 create mode 100644 apps/api/src/services/scrapers/instagram/config.py
 create mode 100644 apps/api/src/services/scrapers/reddit/__init__.py
 create mode 100644 apps/api/src/services/scrapers/scraper.py
 create mode 100644 apps/api/src/services/scrapers/scraper_manager.py
 create mode 100644 apps/api/src/services/scrapers/threads/__init__.py
 create mode 100644 apps/api/src/services/scrapers/twitter/__init__.py
 create mode 100644 apps/api/src/services/scrapers/twitter/config.py
 create mode 100644 apps/api/src/services/scrapers/wechat/__init__.py
 create mode 100644 apps/api/src/services/scrapers/weibo/__init__.py
 create mode 100644 apps/api/src/services/scrapers/weibo/config.py
 create mode 100644 apps/api/src/services/scrapers/weibo/scraper.py
 create mode 100644 apps/api/src/services/scrapers/xiaohongshu/__init__.py
 create mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/__init__.py
 create mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/base_crawler.py
 create mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/client.py
 create mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/core.py
 create mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/exception.py
 create mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/field.py
 create mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/help.py
 create mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/login.py
 create mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/proxy_account_pool.py
 create mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/utils.py
 create mode 100644 apps/api/src/services/scrapers/zhihu/__init__.py
 create mode 100644 apps/api/src/services/scrapers/zhihu/config.py
 create mode 100644 apps/api/src/services/telegraph/__init__.py
 create mode 100644 apps/api/src/templates/bluesky_content.jinja2
 create mode 100644 apps/api/src/templates/bluesky_telegram_text.jinja2
 create mode 100644 apps/api/src/templates/douban_content.jinja2
 create mode 100644 apps/api/src/templates/douban_short_text.jinja2
 create mode 100644 apps/api/src/templates/reddit_content.jinja2
 create mode 100644 apps/api/src/templates/reddit_short_text.jinja2
 create mode 100644 apps/api/src/templates/video_info.jinja2
 create mode 100644 apps/api/src/templates/weibo_content.jinja2
 create mode 100644 apps/api/src/templates/weibo_short_text.jinja2
 create mode 100644 apps/api/src/templates/xiaohongshu_content.jinja2
 create mode 100644 apps/api/src/templates/xiaohongshu_short_text.jinja2
 create mode 100644 apps/api/src/templates/zhihu_content.jinja2
 create mode 100644 apps/api/src/templates/zhihu_short_text.jinja2
 create mode 100644 apps/telegram-bot/Dockerfile
 create mode 100644 apps/telegram-bot/core/__init__.py
 create mode 100644 apps/telegram-bot/core/api_client.py
 create mode 100644 apps/telegram-bot/core/config.py
 create mode 100644 apps/telegram-bot/core/database.py
 create mode 100644 apps/telegram-bot/core/handlers/__init__.py
 create mode 100644 apps/telegram-bot/core/handlers/buttons.py
 create mode 100644 apps/telegram-bot/core/handlers/messages.py
 create mode 100644 apps/telegram-bot/core/handlers/url_process.py
 create mode 100644 apps/telegram-bot/core/main.py
 create mode 100644 apps/telegram-bot/core/models/__init__.py
 create mode 100644 apps/telegram-bot/core/models/database_model.py
 create mode 100644 apps/telegram-bot/core/models/telegram_chat.py
 create mode 100644 apps/telegram-bot/core/services/__init__.py
 create mode 100644 apps/telegram-bot/core/services/bot_app.py
 create mode 100644 apps/telegram-bot/core/services/constants.py
 create mode 100644 apps/telegram-bot/core/services/message_sender.py
 create mode 100644 apps/telegram-bot/core/templates/social_media_message.jinja2
 create mode 100644 apps/telegram-bot/core/webhook/__init__.py
 create mode 100644 apps/telegram-bot/core/webhook/server.py
 create mode 100644 apps/telegram-bot/pyproject.toml
 create mode 100644 packages/shared/fastfetchbot_shared/__init__.py
 create mode 100644 packages/shared/fastfetchbot_shared/config.py
 create mode 100644 packages/shared/fastfetchbot_shared/models/__init__.py
 create mode 100644 packages/shared/fastfetchbot_shared/models/classes.py
 create mode 100644 packages/shared/fastfetchbot_shared/models/metadata_item.py
 create mode 100644 packages/shared/fastfetchbot_shared/models/telegraph_item.py
 create mode 100644 packages/shared/fastfetchbot_shared/models/url_metadata.py
 create mode 100644 packages/shared/fastfetchbot_shared/utils/__init__.py
 create mode 100644 packages/shared/fastfetchbot_shared/utils/config.py
 create mode 100644 packages/shared/fastfetchbot_shared/utils/image.py
 create mode 100644 packages/shared/fastfetchbot_shared/utils/logger.py
 create mode 100644 packages/shared/fastfetchbot_shared/utils/network.py
 create mode 100644 packages/shared/fastfetchbot_shared/utils/parse.py
 create mode 100644 packages/shared/pyproject.toml

diff --git a/apps/api/Dockerfile b/apps/api/Dockerfile
new file mode 100644
index 0000000..ef4d33c
--- /dev/null
+++ b/apps/api/Dockerfile
@@ -0,0 +1,88 @@
+
+# `python-base` sets up all our shared environment variables
+FROM python:3.12-slim AS python-base
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    # uv settings
+    UV_PROJECT_ENVIRONMENT="/opt/pysetup/.venv" \
+    UV_COMPILE_BYTECODE=1 \
+    UV_LINK_MODE=copy \
+    # paths
+    PYSETUP_PATH="/opt/pysetup" \
+    VENV_PATH="/opt/pysetup/.venv" \
+    PLAYWRIGHT_BROWSERS_PATH="/opt/playwright-browsers"
+
+# prepend venv to path
+ENV PATH="$VENV_PATH/bin:$PATH"
+
+
+# `builder-base` stage is used to build deps + create our virtual environment
+FROM python-base AS builder-base
+
+# install uv from the official image
+COPY --from=ghcr.io/astral-sh/uv:0.10.4 /uv /usr/local/bin/uv
+
+RUN apt-get update \
+    && apt-get install --no-install-recommends -y \
+        curl \
+        ffmpeg \
+        libmagic1 \
+        # deps for weasyprint
+        libpango-1.0-0 \
+        libpangoft2-1.0-0 \
+        libjpeg-dev \
+        libopenjp2-7-dev \
+        libffi-dev \
+        build-essential \
+        fonts-wqy-microhei \
+        fonts-wqy-zenhei \
+        fonts-noto-cjk \
+        fonts-noto-cjk-extra
+
+# copy workspace files for dependency resolution
+WORKDIR $PYSETUP_PATH
+COPY pyproject.toml uv.lock ./
+COPY packages/ packages/
+COPY apps/api/ apps/api/
+
+# install runtime deps
+RUN uv sync --frozen --no-dev --no-install-project --package fastfetchbot-api
+
+# install the browser dependencies for playwright
+RUN uv run playwright install --with-deps
+
+
+# `production` image used for runtime
+FROM python-base AS production
+ENV FASTAPI_ENV=production
+ENV PYTHONPATH=/app/apps/api:$PYTHONPATH
+RUN apt-get update \
+    && apt-get install --no-install-recommends -y \
+        curl \
+        ffmpeg \
+        libmagic1 \
+        # deps for weasyprint
+        libpango-1.0-0 \
+        libpangoft2-1.0-0 \
+        libjpeg-dev \
+        libopenjp2-7-dev \
+        libffi-dev \
+        fonts-wqy-microhei \
+        fonts-wqy-zenhei \
+        fonts-noto-cjk \
+        fonts-noto-cjk-extra \
+        libnss3 \
+        libnspr4 \
+        libatk1.0-0 \
+        libatk-bridge2.0-0 \
+        libcups2 \
+        libatspi2.0-0 \
+        libxcomposite1 \
+        libxdamage1
+COPY --from=builder-base $PYSETUP_PATH $PYSETUP_PATH
+COPY --from=builder-base $PLAYWRIGHT_BROWSERS_PATH $PLAYWRIGHT_BROWSERS_PATH
+COPY packages/ /app/packages/
+COPY apps/api/ /app/apps/api/
+WORKDIR /app/apps/api
+CMD ["gunicorn", "-k", "uvicorn.workers.UvicornWorker", "src.main:app", "--preload"]
diff --git a/apps/api/pyproject.toml b/apps/api/pyproject.toml
new file mode 100644
index 0000000..dc07911
--- /dev/null
+++ b/apps/api/pyproject.toml
@@ -0,0 +1,41 @@
+[project]
+name = "fastfetchbot-api"
+version = "0.1.0"
+requires-python = ">=3.12,<3.13"
+dependencies = [
+    "fastfetchbot-shared",
+    "fastapi>=0.115.12",
+    "sentry-sdk[fastapi]>=2.27.0",
+    "gunicorn>=23.0.0",
+    "uvicorn>=0.34.2",
+    "jinja2>=3.1.6",
+    "babel>=2.17.0",
+    "beanie>=1.29.0",
+    "jmespath>=1.0.1",
+    "twitter-api-client-v2>=0.1.1",
+    "atproto>=0.0.61",
+    "asyncpraw>=7.8.1",
+    "pillow>=10.0.0",
+    "pydub>=0.25.1",
+    "xhtml2pdf>=0.2.17",
+    "aioboto3>=13.4.0",
+    "tenacity>=9.1.2",
+    "markdown>=3.8",
+    "openai>=2.15.0",
+    "html-telegraph-poster-v2>=0.2.5",
+    "firecrawl-py>=4.13.0",
+    "zyte-api>=0.8.1",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src"]
+
+[tool.uv]
+package = false
+
+[tool.uv.sources]
+fastfetchbot-shared = { workspace = true }
diff --git a/apps/api/src/__init__.py b/apps/api/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apps/api/src/auth.py b/apps/api/src/auth.py
new file mode 100644
index 0000000..c9600d8
--- /dev/null
+++ b/apps/api/src/auth.py
@@ -0,0 +1,19 @@
+import secrets
+
+from fastapi import HTTPException, Security, status
+from fastapi.security.api_key import APIKeyQuery
+
+from src.config import API_KEY_NAME, API_KEY
+
+api_key_query = APIKeyQuery(name=API_KEY_NAME, auto_error=False)
+
+
+def verify_key(input_key: str, true_key: str):
+    if api_key_query is None or not secrets.compare_digest(input_key, true_key):
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED, detail="API Key Invalid"
+        )
+
+
+def verify_api_key(api_key_query: str = Security(api_key_query)):
+    verify_key(api_key_query, API_KEY)
diff --git a/apps/api/src/config.py b/apps/api/src/config.py
new file mode 100644
index 0000000..aa1e7de
--- /dev/null
+++ b/apps/api/src/config.py
@@ -0,0 +1,154 @@
+import json
+import os
+import tempfile
+
+from jinja2 import Environment, FileSystemLoader
+import gettext
+import secrets
+
+from fastfetchbot_shared.utils.parse import get_env_bool
+
+env = os.environ
+current_directory = os.path.dirname(os.path.abspath(__file__))
+conf_dir = os.path.join(current_directory, "..", "conf")
+
+# FastAPI environment variables
+BASE_URL = env.get("BASE_URL", "localhost")
+API_KEY_NAME = env.get("API_KEY_NAME", "pwd")
+API_KEY = env.get("API_KEY", secrets.token_urlsafe(32))
+
+# Filesystem environment variables
+TEMP_DIR = env.get("TEMP_DIR", tempfile.gettempdir())
+WORK_DIR = env.get("WORK_DIR", os.getcwd())
+DOWNLOAD_DIR = env.get("DOWNLOAD_DIR", os.path.join(WORK_DIR, "download"))
+DEBUG_MODE = get_env_bool(env, "DEBUG_MODE", False)
+
+# Logging environment variables
+LOG_FILE_PATH = env.get("LOG_FILE_PATH", TEMP_DIR)
+LOG_LEVEL = env.get("LOG_LEVEL", "DEBUG")
+
+# MongoDB environment variables
+DATABASE_ON = get_env_bool(env, "DATABASE_ON", False)
+MONGODB_PORT = int(env.get("MONGODB_PORT", 27017)) or 27017
+MONGODB_HOST = env.get("MONGODB_HOST", "localhost")
+MONGODB_URL = env.get("MONGODB_URL", f"mongodb://{MONGODB_HOST}:{MONGODB_PORT}")
+
+# Telegraph
+telegraph_token_list = env.get("TELEGRAPH_TOKEN_LIST", "")
+TELEGRAPH_TOKEN_LIST = telegraph_token_list.split(",") if telegraph_token_list else None
+
+# Youtube-dl environment variables
+FILE_EXPORTER_ON = get_env_bool(env, "FILE_EXPORTER_ON", True)
+FILE_EXPORTER_HOST = env.get("FILE_EXPORTER_HOST", "fast-yt-downloader")
+FILE_EXPORTER_PORT = env.get("FILE_EXPORTER_PORT", "4000")
+FILE_EXPORTER_URL = f"http://{FILE_EXPORTER_HOST}:{FILE_EXPORTER_PORT}"
+DOWNLOAD_VIDEO_TIMEOUT = env.get("DOWNLOAD_VIDEO_TIMEOUT", 600)
+
+# Services environment variables
+templates_directory = os.path.join(current_directory, "templates")
+JINJA2_ENV = Environment(
+    loader=FileSystemLoader(templates_directory), lstrip_blocks=True, trim_blocks=True
+)
+TEMPLATE_LANGUAGE = env.get(
+    "TEMPLATE_LANGUAGE", "zh_CN"
+)  # It is a workaround for translation system
+
+# X-RapidAPI (for instagram)
+X_RAPIDAPI_KEY = env.get("X_RAPIDAPI_KEY", None)
+
+# Twitter
+TWITTER_EMAIL = env.get("TWITTER_EMAIL", None)
+TWITTER_PASSWORD = env.get("TWITTER_PASSWORD", None)
+TWITTER_USERNAME = env.get("TWITTER_USERNAME", None)
+TWITTER_CT0 = env.get("TWITTER_CT0", None)
+TWITTER_AUTH_TOKEN = env.get("TWITTER_AUTH_TOKEN", None)
+TWITTER_COOKIES = {
+    "ct0": TWITTER_CT0,
+    "auth_token": TWITTER_AUTH_TOKEN,
+}
+
+# Bluesky
+BLUESKY_USERNAME = env.get("BLUESKY_USERNAME", None)
+BLUESKY_PASSWORD = env.get("BLUESKY_PASSWORD", None)
+
+# Weibo
+WEIBO_COOKIES = env.get("WEIBO_COOKIES", None)
+
+# Xiaohongshu
+XIAOHONGSHU_A1 = env.get("XIAOHONGSHU_A1", None)
+XIAOHONGSHU_WEBID = env.get("XIAOHONGSHU_WEBID", None)
+XIAOHONGSHU_WEBSESSION = env.get("XIAOHONGSHU_WEBSESSION", None)
+XIAOHONGSHU_COOKIES = {
+    "a1": XIAOHONGSHU_A1,
+    "web_id": XIAOHONGSHU_WEBID,
+    "web_session": XIAOHONGSHU_WEBSESSION,
+}
+XHS_PHONE_LIST = env.get("XHS_PHONE_LIST", "").split(",")
+XHS_IP_PROXY_LIST = env.get("XHS_IP_PROXY_LIST", "").split(",")
+XHS_ENABLE_IP_PROXY = get_env_bool(env, "XHS_ENABLE_IP_PROXY", False)
+XHS_SAVE_LOGIN_STATE = get_env_bool(env, "XHS_SAVE_LOGIN_STATE", True)
+
+# Zhihu
+FXZHIHU_HOST = env.get("FXZHIHU_HOST", "fxzhihu.com")
+
+zhihu_cookie_path = os.path.join(conf_dir, "zhihu_cookies.json")
+if os.path.exists(zhihu_cookie_path):
+    try:
+        with open(zhihu_cookie_path, "r") as f:
+            ZHIHU_COOKIES_JSON = json.load(f)
+    except json.JSONDecodeError:
+        print("Error: The file is not in a valid JSON format.")
+        ZHIHU_COOKIES_JSON = None
+    except FileNotFoundError:
+        print("Error: The file does not exist.")
+        ZHIHU_COOKIES_JSON = None
+else:
+    print("Error: We cannot find it.")
+    ZHIHU_COOKIES_JSON = None
+
+# Reddit
+REDDIT_CLIENT_ID = env.get("REDDIT_CLIENT_ID", None)
+REDDIT_CLIENT_SECRET = env.get("REDDIT_CLIENT_SECRET", None)
+REDDIT_PASSWORD = env.get("REDDIT_PASSWORD", None)
+REDDIT_USERNAME = env.get("REDDIT_USERNAME", None)
+
+# AWS storage
+AWS_STORAGE_ON = get_env_bool(env, "AWS_STORAGE_ON", False)
+AWS_ACCESS_KEY_ID = env.get("AWS_ACCESS_KEY_ID", None)
+AWS_SECRET_ACCESS_KEY = env.get("AWS_SECRET_ACCESS_KEY", None)
+AWS_S3_BUCKET_NAME = env.get("AWS_S3_BUCKET_NAME", "")
+AWS_REGION_NAME = env.get("AWS_REGION_NAME", "")
+AWS_DOMAIN_HOST = env.get("AWS_DOMAIN_HOST", None)
+if not (AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_S3_BUCKET_NAME):
+    AWS_STORAGE_ON = False
+INOREADER_APP_ID = env.get("INOREADER_APP_ID", None)
+INOREADER_APP_KEY = env.get("INOREADER_APP_KEY", None)
+INOREADER_EMAIL = env.get("INOREADER_EMAIL", None)
+INOREADER_PASSWORD = env.get("INOREADER_PASSWORD", None)
+
+# Open AI API
+OPENAI_API_KEY = env.get("OPENAI_API_KEY", None)
+
+# General webpage scraping
+GENERAL_SCRAPING_ON = get_env_bool(env, "GENERAL_SCRAPING_ON", False)
+GENERAL_SCRAPING_API = env.get("GENERAL_SCRAPING_API", "FIRECRAWL")
+
+# Firecrawl API
+FIRECRAWL_API_URL = os.getenv("FIRECRAWL_API_URL", "")
+FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY", "")
+FIRECRAWL_WAIT_FOR = int(env.get("FIRECRAWL_WAIT_FOR", 3000))  # milliseconds to wait for JS rendering
+
+
+# Zyte API
+ZYTE_API_KEY = env.get("ZYTE_API_KEY", None)
+
+# Locale directories environment variables
+localedir = os.path.join(os.path.dirname(__file__), "locale")
+translation = gettext.translation("messages", localedir=localedir, fallback=True)
+_ = translation.gettext
+
+# Utils environment variables
+HTTP_REQUEST_TIMEOUT = env.get("HTTP_REQUEST_TIMEOUT", 30)
+
+# Telegram Bot callback URL (for inter-service communication)
+TELEGRAM_BOT_CALLBACK_URL = env.get("TELEGRAM_BOT_CALLBACK_URL", "http://telegram-bot:10451")
diff --git a/apps/api/src/database.py b/apps/api/src/database.py
new file mode 100644
index 0000000..5a4387e
--- /dev/null
+++ b/apps/api/src/database.py
@@ -0,0 +1,37 @@
+from typing import Optional, Union, List
+
+from motor.motor_asyncio import AsyncIOMotorClient
+from beanie import init_beanie, Document, Indexed
+
+from src.config import MONGODB_URL
+from src.models.database_model import document_list
+from fastfetchbot_shared.utils.logger import logger
+
+
+async def startup() -> None:
+    client = AsyncIOMotorClient(MONGODB_URL)
+    await init_beanie(database=client["telegram_bot"], document_models=document_list)
+
+
+async def shutdown() -> None:
+    pass
+
+
+async def save_instances(instances: Union[Document, List[Document]], *args) -> None:
+    if instances is None:
+        raise TypeError("instances must be a Model or a list of Model")
+
+    if isinstance(instances, Document):
+        instance_type = type(instances)
+        await instance_type.insert(instances)
+    elif isinstance(instances, list):
+        instance_type = type(instances[0])
+        await instance_type.insert_many(instances)
+    else:
+        raise TypeError("instances must be a Model or a list of Model")
+
+    for arg in args:
+        if not isinstance(arg, Document):
+            raise TypeError("args must be a Model")
+        instance_type = type(arg)
+        await instance_type.insert_one(arg)
diff --git a/apps/api/src/main.py b/apps/api/src/main.py
new file mode 100644
index 0000000..2a712be
--- /dev/null
+++ b/apps/api/src/main.py
@@ -0,0 +1,55 @@
+import sentry_sdk
+
+from fastapi import FastAPI, Request
+from contextlib import asynccontextmanager
+from starlette.middleware.base import BaseHTTPMiddleware
+
+from src import database
+from src.routers import inoreader, scraper_routers, scraper
+from src.config import DATABASE_ON
+from fastfetchbot_shared.utils.logger import logger
+
+SENTRY_DSN = ""
+
+# https://docs.sentry.io/platforms/python/guides/fastapi/
+sentry_sdk.init(
+    dsn=SENTRY_DSN,
+    # Set traces_sample_rate to 1.0 to capture 100%
+    # of transactions for performance monitoring.
+    # We recommend adjusting this value in production,
+    traces_sample_rate=1.0,
+)
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    if DATABASE_ON:
+        await database.startup()
+    try:
+        yield
+    finally:
+        if DATABASE_ON:
+            await database.shutdown()
+
+
+class LogMiddleware(BaseHTTPMiddleware):
+    def __init__(self, app):
+        super().__init__(app)
+
+    async def dispatch(self, request: Request, call_next):
+        logger.info(f"{request.method} {request.url}")
+        response = await call_next(request)
+        return response
+
+
+def create_app():
+    fastapi_app = FastAPI(lifespan=lifespan)
+    fastapi_app.add_middleware(LogMiddleware)
+    fastapi_app.include_router(inoreader.router)
+    fastapi_app.include_router(scraper.router)
+    for router in scraper_routers.scraper_routers:
+        fastapi_app.include_router(router)
+    return fastapi_app
+
+
+app = create_app()
diff --git a/apps/api/src/models/__init__.py b/apps/api/src/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apps/api/src/models/database_model.py b/apps/api/src/models/database_model.py
new file mode 100644
index 0000000..049756f
--- /dev/null
+++ b/apps/api/src/models/database_model.py
@@ -0,0 +1,41 @@
+from typing import Optional, Any
+from datetime import datetime
+
+from pydantic import BaseModel, Field
+from beanie import Document, Indexed, Insert, after_event, before_event
+
+from fastfetchbot_shared.models.metadata_item import MediaFile, MessageType
+from fastfetchbot_shared.utils.logger import logger
+from fastfetchbot_shared.utils.parse import get_html_text_length
+
+
+class Metadata(Document):
+    title: str = Field(default="untitled")
+    message_type: MessageType = MessageType.SHORT
+    url: str
+    author: Optional[str] = None
+    author_url: Optional[str] = None
+    text: Optional[str] = None
+    text_length: Optional[int] = Field(ge=0)
+    content: Optional[str] = None
+    content_length: Optional[int] = Field(ge=0)
+    category: Optional[str] = None
+    source: Optional[str] = None
+    media_files: Optional[list[MediaFile]] = None
+    telegraph_url: Optional[str] = None
+    timestamp: datetime = Field(default_factory=datetime.utcnow)
+    scrape_status: bool = False
+
+    @before_event(Insert)
+    def get_text_length(self):
+        self.text_length = get_html_text_length(self.text)
+        self.content_length = get_html_text_length(self.content)
+
+    #
+    @staticmethod
+    def from_dict(obj: Any) -> "Metadata":
+        assert isinstance(obj, dict)
+        return Metadata(**obj)
+
+
+document_list = [Metadata]
diff --git a/apps/api/src/routers/__init__.py b/apps/api/src/routers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apps/api/src/routers/inoreader.py b/apps/api/src/routers/inoreader.py
new file mode 100644
index 0000000..521adaa
--- /dev/null
+++ b/apps/api/src/routers/inoreader.py
@@ -0,0 +1,38 @@
+from fastapi import APIRouter
+from fastapi.requests import Request
+
+from src.config import INOREADER_APP_ID, INOREADER_APP_KEY
+from src.services.inoreader import Inoreader
+from src.services.inoreader.process import (
+    get_inoreader_item_async,
+    process_inoreader_data,
+    default_telegram_channel_id
+)
+from fastapi import Security
+from src.auth import verify_api_key
+
+router = APIRouter(prefix="/inoreader")
+
+
+async def get_inoreader_webhook_data(data: dict):
+    result = data["items"]
+    return result
+
+
+@router.post("/triggerAsync", dependencies=[Security(verify_api_key)])
+async def inoreader_trigger_webhook(request: Request):
+    if not INOREADER_APP_ID or not INOREADER_APP_KEY:
+        return "inoreader app id or key not set"
+    params = request.query_params
+    await get_inoreader_item_async(trigger=True, params=params)
+    return "ok"
+
+
+@router.post("/webhook", dependencies=[Security(verify_api_key)])
+async def inoreader_tag_webhook(request: Request):
+    data = await request.json()
+    data = await Inoreader.process_items_data(data)
+    params = request.query_params
+    telegram_channel_id = params.get("channel_id", default_telegram_channel_id)
+    await process_inoreader_data(data=data, use_inoreader_content=True, telegram_channel_id=telegram_channel_id)
+    return "ok"
diff --git a/apps/api/src/routers/scraper.py b/apps/api/src/routers/scraper.py
new file mode 100644
index 0000000..b02be9c
--- /dev/null
+++ b/apps/api/src/routers/scraper.py
@@ -0,0 +1,37 @@
+import asyncio
+
+from fastapi import APIRouter
+from fastapi.requests import Request
+
+from src.config import API_KEY_NAME
+from src.services.scrapers.common import InfoExtractService
+from fastapi import Security
+from src.auth import verify_api_key
+from fastfetchbot_shared.utils.logger import logger
+from fastfetchbot_shared.utils.parse import get_url_metadata
+
+router = APIRouter(prefix="/scraper")
+
+
+@router.post("/getItem", dependencies=[Security(verify_api_key)])
+async def get_item_route(request: Request):
+    logger.debug("A scraper getItem request received")
+    query_params = dict(request.query_params)
+    url = query_params.pop("url")
+    ban_list = query_params.pop("ban_list", None)
+    logger.debug(f"get_item_route: url: {url}, query_params: {query_params}")
+    if API_KEY_NAME in query_params:
+        query_params.pop(API_KEY_NAME)
+    url_metadata = await get_url_metadata(url, ban_list)
+    item = InfoExtractService(url_metadata, **query_params)
+    result = await item.get_item()
+    logger.debug(f"getItem result: {result}")
+    return result
+
+
+@router.post("/getUrlMetadata", dependencies=[Security(verify_api_key)])
+async def get_url_metadata_route(request: Request):
+    url = request.query_params.get("url")
+    ban_list = request.query_params.get("ban_list")
+    url_metadata = await get_url_metadata(url, ban_list)
+    return url_metadata.to_dict()
diff --git a/apps/api/src/routers/scraper_routers.py b/apps/api/src/routers/scraper_routers.py
new file mode 100644
index 0000000..66316c7
--- /dev/null
+++ b/apps/api/src/routers/scraper_routers.py
@@ -0,0 +1,6 @@
+from .wechat import router as wechat_router
+
+
+scraper_routers = [
+    wechat_router,
+]
diff --git a/apps/api/src/routers/wechat.py b/apps/api/src/routers/wechat.py
new file mode 100644
index 0000000..3f66b55
--- /dev/null
+++ b/apps/api/src/routers/wechat.py
@@ -0,0 +1,29 @@
+from fastapi import APIRouter
+from fastapi.requests import Request
+
+from fastfetchbot_shared.models.url_metadata import UrlMetadata
+from src.services.scrapers.common import InfoExtractService
+from fastapi import Security
+from src.auth import verify_api_key
+
+router = APIRouter(prefix="/wechat")
+
+
+@router.post("/gzh", dependencies=[Security(verify_api_key)])
+async def wechat_gzh_scrape(request: Request):
+    url = request.query_params.get("url")
+    if url:
+        url_metadata = UrlMetadata.from_dict({
+            "url": url,
+            "type": "social_media",
+            "source": "wechat",
+        })
+    else:
+        customized_url_metadata = request.json()
+        if customized_url_metadata:
+            url_metadata = UrlMetadata.from_dict(customized_url_metadata)
+        else:
+            return "url or url metadata not found"
+    item = InfoExtractService(url_metadata)
+    result = await item.get_item()
+    return result
diff --git a/apps/api/src/services/__init__.py b/apps/api/src/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apps/api/src/services/amazon/__init__.py b/apps/api/src/services/amazon/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apps/api/src/services/amazon/s3.py b/apps/api/src/services/amazon/s3.py
new file mode 100644
index 0000000..e0e13aa
--- /dev/null
+++ b/apps/api/src/services/amazon/s3.py
@@ -0,0 +1,67 @@
+import asyncio
+import uuid
+from datetime import datetime
+from urllib.parse import urlparse, quote
+
+import aiofiles.os
+from pathlib import Path
+
+import aioboto3
+from botocore.exceptions import ClientError
+
+from fastfetchbot_shared.utils.logger import logger
+from fastfetchbot_shared.utils.network import download_file_to_local
+from src.config import AWS_S3_BUCKET_NAME, AWS_REGION_NAME, AWS_DOMAIN_HOST
+
+session = aioboto3.Session()
+image_url_host = (
+    AWS_DOMAIN_HOST
+    if AWS_DOMAIN_HOST
+    else f"{AWS_S3_BUCKET_NAME}.s3.{AWS_REGION_NAME}.amazonaws.com"
+)
+
+
+async def download_and_upload(url: str, referer: str = None, suite: str = "test") -> str:
+    urlparser = urlparse(url)
+    file_name = (urlparser.netloc + urlparser.path).replace("/", "-")
+    local_path = await download_file_to_local(url=url, referer=referer, file_name=file_name)
+    local_path = Path(local_path)
+    file_name = local_path.name
+    if not local_path:
+        return ""
+    s3_path = await upload(
+        suite=suite,
+        staging_path=local_path,
+        file_name=file_name,
+    )
+    await aiofiles.os.remove(local_path)
+    return s3_path
+
+
+async def upload(
+        staging_path: Path,
+        bucket: str = AWS_S3_BUCKET_NAME,
+        suite: str = "test",
+        release: str = datetime.now().strftime("%Y-%m-%d"),
+        file_name: str = None,
+) -> str:
+    if not file_name:
+        file_name = uuid.uuid4().hex
+    blob_s3_key = f"{suite}/{release}/{file_name}"
+    async with session.client("s3") as s3:
+        try:
+            with staging_path.open("rb") as spfp:
+                logger.info(f"Uploading {blob_s3_key}")
+                await s3.upload_fileobj(
+                    spfp,
+                    bucket,
+                    blob_s3_key,
+                )
+                logger.info(f"Uploaded {file_name} to {suite}/{release}")
+        except Exception as e:
+            logger.error(f"Failed to upload {file_name} to {suite}/{release}, {e}")
+            return ""
+        image_url = f"https://{image_url_host}/{blob_s3_key}"
+        urlparser = urlparse(image_url)
+        quoted_url = urlparser.scheme + "://" + urlparser.netloc + quote(urlparser.path)
+        return quoted_url
diff --git a/apps/api/src/services/file_export/__init__.py b/apps/api/src/services/file_export/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apps/api/src/services/file_export/audio_transcribe/__init__.py b/apps/api/src/services/file_export/audio_transcribe/__init__.py
new file mode 100644
index 0000000..5088ff7
--- /dev/null
+++ b/apps/api/src/services/file_export/audio_transcribe/__init__.py
@@ -0,0 +1,30 @@
+import httpx
+
+from src.config import OPENAI_API_KEY, FILE_EXPORTER_URL, DOWNLOAD_VIDEO_TIMEOUT
+from fastfetchbot_shared.utils.logger import logger
+from fastfetchbot_shared.utils.parse import wrap_text_into_html
+
+TRANSCRIBE_MODEL = "whisper-1"
+SEGMENT_LENGTH = 5 * 60
+
+
+class AudioTranscribe:
+    def __init__(self, audio_file: str):
+        self.audio_file = audio_file
+
+    async def transcribe(self):
+        return await self._get_audio_text(self.audio_file)
+
+    @staticmethod
+    async def _get_audio_text(audio_file: str):
+        async with httpx.AsyncClient() as client:
+            body = {
+                "audio_file": audio_file,
+                "openai_api_key": OPENAI_API_KEY,
+            }
+            request_url = FILE_EXPORTER_URL + "/transcribe"
+            response = await client.post(
+                url=request_url, json=body, timeout=DOWNLOAD_VIDEO_TIMEOUT
+            )
+            transcript = response.json().get("transcript")
+            return transcript
diff --git a/apps/api/src/services/file_export/document_export/__init__.py b/apps/api/src/services/file_export/document_export/__init__.py
new file mode 100644
index 0000000..282167d
--- /dev/null
+++ b/apps/api/src/services/file_export/document_export/__init__.py
@@ -0,0 +1,10 @@
+from . import pdf_export
+
+
+class DocumentExport(object):
+    def __init__(self, document):
+        self.document = document
+
+    def export(self):
+        if self.document["type"] == "pdf":
+            return pdf_export.PdfExport(self.document["content"]).export()
diff --git a/apps/api/src/services/file_export/document_export/pdf_export.py b/apps/api/src/services/file_export/document_export/pdf_export.py
new file mode 100644
index 0000000..88fd0b5
--- /dev/null
+++ b/apps/api/src/services/file_export/document_export/pdf_export.py
@@ -0,0 +1,89 @@
+import asyncio
+import functools
+
+# import gc
+import os
+import uuid
+from pathlib import Path
+
+import aiofiles
+import aiofiles.os
+import httpx
+from bs4 import BeautifulSoup
+
+from src.config import DOWNLOAD_DIR, FILE_EXPORTER_URL, DOWNLOAD_VIDEO_TIMEOUT, TEMP_DIR, AWS_STORAGE_ON
+from src.services.amazon.s3 import upload as upload_to_s3
+from fastfetchbot_shared.utils.logger import logger
+
+current_directory = os.path.dirname(os.path.abspath(__file__))
+
+PDF_STYLESHEET = os.path.join(current_directory, "pdf_export.css")
+
+
+async def upload_file_to_s3(output_filename):
+    return await upload_to_s3(
+        staging_path=output_filename,
+        suite="documents",
+        file_name=output_filename.name,
+    )
+
+
+class PdfExport:
+    def __init__(self, title: str, html_string: str = None):
+        self.title = title
+        self.html_string = html_string
+
+    async def export(self, method: str = "file") -> str:
+        body = {
+            "method": method
+        }
+        html_string = self.wrap_html_string(self.html_string)
+        if method == "string":
+            body["html_string"] = html_string,
+            logger.debug(
+                f"""
+                    html_string: {html_string}
+                    """
+            )
+        elif method == "file":
+            filename = f"{self.title}-{uuid.uuid4()}.html"
+            filename = os.path.join(TEMP_DIR, filename)
+            async with aiofiles.open(
+                filename, "w", encoding="utf-8"
+            ) as f:
+                await f.write(html_string)
+                html_file = filename
+                logger.debug(html_file)
+            body["html_file"] = html_file
+        output_filename = f"{self.title}-{uuid.uuid4()}.pdf"
+        body["output_filename"] = output_filename
+
+        async with httpx.AsyncClient() as client:
+            request_url = FILE_EXPORTER_URL + "/pdfExport"
+            logger.info(f"requesting pdf export from pdf server: {body}")
+            resp = await client.post(
+                request_url, json=body, timeout=DOWNLOAD_VIDEO_TIMEOUT
+            )
+        output_filename = resp.json().get("output_filename")
+        logger.info(f"pdf export success: {output_filename}")
+        await aiofiles.os.remove(html_file)
+        if AWS_STORAGE_ON:
+            local_filename = output_filename
+            output_filename = await upload_file_to_s3(Path(output_filename))
+            await aiofiles.os.remove(local_filename)
+        return output_filename
+
+    @staticmethod
+    def wrap_html_string(html_string: str) -> str:
+        soup = BeautifulSoup(
+            '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
+            '<meta charset="UTF-8"></head><body></body></html>',
+            "html.parser",
+        )
+        soup.body.append(BeautifulSoup(html_string, "html.parser"))
+        for tag in soup.find_all(True):
+            if "style" in tag.attrs:
+                del tag["style"]
+        for style_tag in soup.find_all("style"):
+            style_tag.decompose()
+        return soup.prettify()
diff --git a/apps/api/src/services/file_export/video_download/__init__.py b/apps/api/src/services/file_export/video_download/__init__.py
new file mode 100644
index 0000000..01f95b6
--- /dev/null
+++ b/apps/api/src/services/file_export/video_download/__init__.py
@@ -0,0 +1,232 @@
+from typing import Any, Optional
+
+import httpx
+from urllib.parse import urlparse, parse_qs
+
+from fastfetchbot_shared.models.metadata_item import MetadataItem, MessageType, MediaFile
+from src.services.file_export.audio_transcribe import AudioTranscribe
+from src.config import FILE_EXPORTER_URL, DOWNLOAD_VIDEO_TIMEOUT
+from fastfetchbot_shared.utils.parse import unix_timestamp_to_utc, second_to_time, wrap_text_into_html
+from fastfetchbot_shared.utils.logger import logger
+from src.config import JINJA2_ENV
+
+video_info_template = JINJA2_ENV.get_template("video_info.jinja2")
+
+
+class VideoDownloader(MetadataItem):
+    def __init__(
+            self,
+            url: str,
+            category: str,
+            data: Optional[Any] = None,
+            download: bool = True,
+            audio_only: bool = False,
+            hd: bool = False,
+            transcribe: bool = False,
+            **kwargs,
+    ):
+        self.extractor = category
+        self.url = url
+        self.author_url = ""
+        self.download = download
+        self.audio_only = audio_only
+        self.transcribe = transcribe
+        self.hd = hd
+        self.message_type = MessageType.SHORT
+        self.file_path = None
+        # metadata variables
+        self.category = category
+        self.media_files = []
+        # auxiliary variables
+        self.created = None
+        self.duration = None
+
+    @classmethod
+    async def create(cls, *args, **kwargs):
+        instance = cls(*args, **kwargs)
+        instance.url = await instance._parse_url(instance.url)
+        return instance
+
+    async def get_item(self) -> dict:
+        self.url = await self._parse_url(self.url)
+        await self.get_video()
+        return self.to_dict()
+
+    async def get_video(self) -> None:
+        content_info = await self.get_video_info()
+        self.file_path = content_info["file_path"]
+        video_info_funcs = {
+            "youtube": self._youtube_info_parse,
+            "bilibili": self._bilibili_info_parse,
+        }
+        meta_info = video_info_funcs[self.extractor](content_info)
+        self._video_info_formatting(meta_info)
+        # AI transcribe
+        if self.transcribe:
+            audio_content_info = await self.get_video_info(audio_only=True)
+            audio_file_path = audio_content_info["file_path"]
+            audio_transcribe = AudioTranscribe(audio_file_path)
+            transcribe_text = await audio_transcribe.transcribe()
+            if self.download is False:
+                self.message_type = MessageType.LONG
+            self.text += "\nAI全文摘录：" + transcribe_text
+            self.content += "<hr>" + wrap_text_into_html(transcribe_text)
+
+    async def _parse_url(self, url: str) -> str:
+        async def _get_redirected_url(original_url: str) -> str:
+            async with httpx.AsyncClient(follow_redirects=False) as client:
+                resp = await client.get(original_url)
+                if resp.status_code == 200:
+                    original_url = resp.url
+                elif resp.status_code == 302:
+                    original_url = resp.headers["Location"]
+                return original_url
+
+        def _remove_youtube_link_tracing(original_url: str) -> str:
+            original_url_parser = urlparse(original_url)
+            original_url_hostname = str(original_url_parser.hostname)
+
+            if "youtu.be" in original_url_hostname:
+                # remove all queries
+                original_url = original_url.split("?")[0]
+            if "youtube.com" in original_url_hostname:
+                # remove all queries except "?v=" part
+                original_url = original_url_parser.scheme + "://" + original_url_parser.netloc + original_url_parser.path
+                if original_url_parser.query:
+                    v_part_query = [item for item in original_url_parser.query.split("&") if "v=" in item]
+                    if v_part_query:
+                        original_url += "?" + v_part_query[0]
+            return original_url
+
+        def _remove_bilibili_link_tracing(original_url: str) -> str:
+            original_url_parser = urlparse(original_url)
+            original_url_hostname = str(original_url_parser.hostname)
+            query_dict = parse_qs(original_url_parser.query)
+            bilibili_p_query_string = "?p=" + query_dict["p"][0] if 'p' in query_dict else ""
+
+            if "bilibili.com" in original_url_hostname:
+                original_url = original_url_parser.scheme + "://" + original_url_parser.netloc + original_url_parser.path
+            return original_url + bilibili_p_query_string
+
+        logger.info(f"parsing original video url: {url} for {self.extractor}")
+
+        url_parser = urlparse(url)
+        url_hostname = str(url_parser.hostname)
+
+        if self.extractor == "bilibili":
+            if "b23.tv" in url_hostname:
+                url = await _get_redirected_url(url)
+            if "m.bilibili.com" in url_hostname:
+                url = url.replace("m.bilibili.com", "www.bilibili.com")
+            url = _remove_bilibili_link_tracing(url)
+        elif self.extractor == "youtube":
+            if "youtu.be" in url_hostname:
+                url = await _get_redirected_url(url)
+            url = _remove_youtube_link_tracing(url)
+
+        logger.info(f"parsed video url: {url} for {self.extractor}")
+        return url
+
+    async def get_video_info(
+            self,
+            url: str = None,
+            download: bool = None,
+            extractor: str = None,
+            audio_only: bool = None,
+            hd: bool = None,
+    ) -> dict:
+        """
+        make a request to youtube-dl server to get video info
+        :return: video info dict
+        """
+        if url is None:
+            url = self.url
+        if download is None:
+            download = self.download
+        if extractor is None:
+            extractor = self.extractor
+        if audio_only is None:
+            audio_only = self.audio_only
+        if hd is None:
+            hd = self.hd
+        async with httpx.AsyncClient() as client:
+            body = {
+                "url": url,
+                "download": download,
+                "extractor": extractor,
+                "audio_only": audio_only,
+                "hd": hd,
+            }
+            request_url = FILE_EXPORTER_URL + "/videoDownload"
+            logger.info(f"requesting video info from youtube-dl server: {body}")
+            if download is True:
+                logger.info(f"video downloading... it may take a while")
+                if hd is True:
+                    logger.info(f"downloading HD video, it may take longer")
+                elif audio_only is True:
+                    logger.info(f"downloading audio only")
+            logger.debug(f"downloading video timeout: {DOWNLOAD_VIDEO_TIMEOUT}")
+            resp = await client.post(
+                request_url, json=body, timeout=DOWNLOAD_VIDEO_TIMEOUT
+            )
+            content_info = resp.json().get("content_info")
+            file_path = resp.json().get("file_path")
+            content_info["file_path"] = file_path
+            return content_info
+
+    def _video_info_formatting(self, meta_info: dict):
+        self.title = meta_info["title"]
+        self.author = meta_info["author"]
+        self.author_url = meta_info["author_url"]
+        if len(meta_info["description"]) > 800:
+            meta_info["description"] = meta_info["description"][:800] + "..."
+        self.created = meta_info["upload_date"]
+        self.duration = meta_info["duration"]
+        self.text = video_info_template.render(
+            data={
+                "url": self.url,
+                "title": self.title,
+                "author": self.author,
+                "author_url": self.author_url,
+                "duration": self.duration,
+                "created": self.created,
+                "playback_data": meta_info["playback_data"],
+                "description": meta_info["description"],
+            }
+        )
+        self.content = self.text.replace("\n", "<br>")
+        if self.download:
+            media_type = "video"
+            if self.audio_only:
+                media_type = "audio"
+            self.media_files = [MediaFile(media_type, self.file_path, "")]
+
+    @staticmethod
+    def _youtube_info_parse(video_info: dict) -> dict:
+        return {
+            "id": video_info["id"],
+            "title": video_info["title"],
+            "author": video_info["uploader"],
+            "author_url": video_info["uploader_url"] or video_info["channel_url"],
+            "description": video_info["description"],
+            "playback_data": f"视频播放量：{video_info['view_count']} 评论数：{video_info['comment_count']}",
+            "author_avatar": video_info["thumbnail"],
+            "upload_date": str(video_info["upload_date"]),
+            "duration": second_to_time(round(video_info["duration"])),
+        }
+
+    @staticmethod
+    def _bilibili_info_parse(video_info: dict) -> dict:
+        return {
+            "id": video_info["id"],
+            "title": video_info["title"],
+            "author": video_info["uploader"],
+            "author_url": "https://space.bilibili.com/"
+                          + str(video_info["uploader_id"]),
+            "author_avatar": video_info["thumbnail"],
+            "ext": video_info["ext"],
+            "description": video_info["description"],
+            "playback_data": f"视频播放量：{video_info['view_count']} 弹幕数：{video_info['comment_count']} 点赞数：{video_info['like_count']}",
+            "upload_date": unix_timestamp_to_utc(video_info["timestamp"]),
+            "duration": second_to_time(round(video_info["duration"])),
+        }
diff --git a/apps/api/src/services/inoreader/__init__.py b/apps/api/src/services/inoreader/__init__.py
new file mode 100644
index 0000000..1343079
--- /dev/null
+++ b/apps/api/src/services/inoreader/__init__.py
@@ -0,0 +1,168 @@
+from typing import Optional
+from urllib.parse import quote
+
+import httpx
+from bs4 import BeautifulSoup
+import jmespath
+from httpx import Response
+
+from fastfetchbot_shared.models.metadata_item import MetadataItem, MediaFile, MessageType
+from fastfetchbot_shared.utils.network import HEADERS
+from fastfetchbot_shared.utils.logger import logger
+from fastfetchbot_shared.utils.parse import get_html_text_length
+from src.config import (
+    INOREADER_APP_ID,
+    INOREADER_APP_KEY,
+    INOREADER_EMAIL,
+    INOREADER_PASSWORD,
+)
+
+INOREADER_CONTENT_URL = "https://www.inoreader.com/reader/api/0/stream/contents/"
+TAG_PATH = "user/-/label/"
+OTHER_PATH = "user/-/state/com.google/"
+INOREADER_LOGIN_URL = "https://www.inoreader.com/accounts/ClientLogin"
+
+
+class Inoreader(MetadataItem):
+    def __init__(self, url: str = None, data: dict = None, **kwargs):
+        if url:
+            self.url = url
+        if data:
+            self.title = data.get("title", "")
+            self.message = data.get("message", "")
+            self.author = data.get("author", "")
+            self.author_url = data.get("author_url", "")
+            self.category = data.get("category", "")
+            self.raw_content = data.get("content", "")
+            self.content = self.raw_content
+        if kwargs.get("category"):
+            self.category = kwargs["category"]
+        self.media_files = []
+        self.message_type = MessageType.LONG
+
+    def _from_data(self, data: dict):
+        self.title = data.get("title", "")
+        self.message = data.get("message", "")
+        self.author = data.get("author", "")
+        self.author_url = data.get("author_url", "")
+        self.category = data.get("category", "")
+        self.raw_content = data.get("content", "")
+        self.content = self.raw_content
+
+    async def get_item(self, api: bool = False) -> dict:
+        if api:
+            data = await self.get_api_item_data()
+        self._resolve_media_files()
+        if get_html_text_length(self.content) < 400:
+            self.message_type = MessageType.SHORT
+        metadata_dict = self.to_dict()
+        metadata_dict["message"] = self.message
+        return metadata_dict
+
+    def _resolve_media_files(self):
+        soup = BeautifulSoup(self.raw_content, "html.parser")
+        for img in soup.find_all("img"):
+            self.media_files.append(MediaFile(url=img["src"], media_type="image"))
+            img.extract()
+        for video in soup.find_all("video"):
+            self.media_files.append(MediaFile(url=video["src"], media_type="video"))
+            video.extract()
+        for tags in soup.find_all(["p", "span"]):
+            tags.unwrap()
+        self.text = str(soup)
+        self.text = '<a href="' + self.url + '">' + self.author + "</a>: " + self.text
+
+    @staticmethod
+    def get_stream_id(
+            stream_type: str = "broadcast", tag: str = None, feed: str = None
+    ) -> str:
+        if stream_type == "feed":
+            stream_id = feed
+        elif stream_type == "tag":
+            stream_id = TAG_PATH + tag
+        else:
+            stream_id = OTHER_PATH + stream_type
+        stream_id = quote(stream_id)
+        return stream_id
+
+    @staticmethod
+    async def mark_all_as_read(stream_id: str, timestamp: int = 0) -> None:
+        request_url = "https://www.inoreader.com/reader/api/0/mark-all-as-read"
+        params = {"s": stream_id, "ts": timestamp}
+        resp = await Inoreader.get_api_info(url=request_url, params=params)
+        logger.debug(resp.text)
+
+    @staticmethod
+    async def get_api_item_data(
+            stream_type: str = "broadcast",
+            tag: str = None,
+            feed: str = None,
+            params: dict = None,
+    ) -> Optional[dict | list]:
+        stream_id = Inoreader.get_stream_id(stream_type=stream_type, tag=tag, feed=feed)
+        request_url = INOREADER_CONTENT_URL + stream_id
+        default_params = {
+            "comments": 1,
+            "n": 10,
+            "r": "o",
+            "xt": "user/-/state/com.google/read",
+        }
+        if params:
+            default_params.update(params)
+        params = default_params
+        resp = await Inoreader.get_api_info(url=request_url, params=params)
+        logger.debug(resp.text)
+        data = resp.json()
+        data = await Inoreader.process_items_data(data)
+        return data
+
+    @staticmethod
+    async def process_items_data(data: dict) -> Optional[dict | list]:
+        expression = """
+                            items[].{
+                            "aurl": canonical[0].href,
+                            "title": title,
+                            "author": origin.title,
+                            "author_url": origin.htmlUrl,
+                            "content": summary.content,
+                            "category": categories[-1],
+                            "message": comments[0].commentBody,
+                            "timestamp": updated
+                            }
+                        """
+        data = jmespath.search(expression, data)
+        for item in data:
+            item["category"] = item["category"].split("/")[-1]
+        return data
+
+    @staticmethod
+    async def get_api_info(
+            url: str,
+            params=None,
+    ) -> Response:
+        async with httpx.AsyncClient() as client:
+            resp = await client.post(
+                INOREADER_LOGIN_URL,
+                params={
+                    "Email": INOREADER_EMAIL,
+                    "Passwd": INOREADER_PASSWORD,
+                },
+            )
+            authorization = resp.text.split("\n")[2].split("=")[1]
+
+        async with httpx.AsyncClient() as client:
+            headers = HEADERS
+            headers["Authorization"] = f"GoogleLogin auth={authorization}"
+            params = params or {}
+            params.update(
+                {
+                    "AppId": INOREADER_APP_ID,
+                    "AppKey": INOREADER_APP_KEY,
+                }
+            )
+            resp = await client.get(
+                url=url,
+                params=params,
+                headers=headers,
+            )
+            return resp
diff --git a/apps/api/src/services/inoreader/process.py b/apps/api/src/services/inoreader/process.py
new file mode 100644
index 0000000..7fc16e3
--- /dev/null
+++ b/apps/api/src/services/inoreader/process.py
@@ -0,0 +1,108 @@
+from typing import Union, Optional, Dict, Callable, Awaitable
+
+import httpx
+
+from src.config import TELEGRAM_BOT_CALLBACK_URL
+from fastfetchbot_shared.models.url_metadata import UrlMetadata
+from src.services.inoreader import Inoreader
+from src.services.scrapers.common import InfoExtractService
+from fastfetchbot_shared.utils.logger import logger
+from fastfetchbot_shared.utils.parse import get_url_metadata, get_bool
+
+default_telegram_channel_id = None
+
+# Type alias for the message callback
+MessageCallback = Callable[[dict, Union[int, str]], Awaitable[None]]
+
+
+async def _default_message_callback(metadata_item: dict, chat_id: Union[int, str]) -> None:
+    """Default callback that sends via HTTP to the Telegram bot service."""
+    async with httpx.AsyncClient() as client:
+        await client.post(
+            f"{TELEGRAM_BOT_CALLBACK_URL}/send_message",
+            json={"data": metadata_item, "chat_id": str(chat_id)},
+            timeout=120,
+        )
+
+
+async def process_inoreader_data(
+        data: list,
+        use_inoreader_content: bool,
+        telegram_channel_id: Union[int, str] = default_telegram_channel_id,
+        stream_id: str = None,
+        message_callback: MessageCallback = None,
+):
+    if message_callback is None:
+        message_callback = _default_message_callback
+
+    for item in data:
+        url_type_item = await get_url_metadata(item["aurl"])
+        url_type_dict = url_type_item.to_dict()
+        logger.debug(f"ino original: {use_inoreader_content}")
+        if (
+                use_inoreader_content is True
+                or url_type_dict["content_type"] == "unknown"
+        ):
+            is_video = url_type_dict["content_type"] == "video"
+            content_type = url_type_dict["content_type"] if is_video else "social_media"
+            source = url_type_dict["source"] if is_video else "inoreader"
+            url_metadata = UrlMetadata(
+                url=item["aurl"],
+                content_type=content_type,
+                source=source,
+            )
+            metadata_item = InfoExtractService(
+                url_metadata=url_metadata,
+                data=item,
+                store_document=True,
+                category=item["category"],
+            )
+        else:
+            metadata_item = InfoExtractService(
+                url_metadata=url_type_item,
+                data=item,
+                store_document=True,
+            )
+        message_metadata_item = await metadata_item.get_item()
+        await message_callback(message_metadata_item, telegram_channel_id)
+        if stream_id:
+            await Inoreader.mark_all_as_read(
+                stream_id=stream_id, timestamp=item["timestamp"] - 1
+            )
+
+
+async def get_inoreader_item_async(
+        data: Optional[Dict] = None,
+        trigger: bool = False,
+        params: Optional[Dict] = None,
+        message_callback: MessageCallback = None,
+) -> None:
+    stream_id = None
+    use_inoreader_content = True
+    telegram_channel_id = default_telegram_channel_id
+    if trigger and params and not data:
+        logger.debug(f"params:{params}")
+        use_inoreader_content = get_bool(params.get("useInoreaderContent"), True)
+        stream_type = params.get("streamType", "broadcast")
+        telegram_channel_id = params.get("channelId", default_telegram_channel_id)
+        tag = params.get("tag", None)
+        feed = params.get("feed", None)
+        the_remaining_params = {
+            k: v
+            for k, v in params.items()
+            if k not in ["streamType", "channelId", "tag", "feed"]
+        }
+        data = await Inoreader.get_api_item_data(
+            stream_type=stream_type, tag=tag, params=the_remaining_params, feed=feed
+        )
+        if not data:
+            return
+        stream_id = Inoreader.get_stream_id(stream_type=stream_type, tag=tag, feed=feed)
+    if type(data) is dict:
+        data = [data]
+    await process_inoreader_data(
+        data, use_inoreader_content, telegram_channel_id, stream_id,
+        message_callback=message_callback,
+    )
+    if stream_id:
+        await Inoreader.mark_all_as_read(stream_id=stream_id)
diff --git a/apps/api/src/services/scrapers/__init__.py b/apps/api/src/services/scrapers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apps/api/src/services/scrapers/bluesky/__init__.py b/apps/api/src/services/scrapers/bluesky/__init__.py
new file mode 100644
index 0000000..274d049
--- /dev/null
+++ b/apps/api/src/services/scrapers/bluesky/__init__.py
@@ -0,0 +1,45 @@
+import traceback
+from dataclasses import dataclass
+from urllib.parse import urlparse
+from typing import Dict, Optional, Any
+
+import httpx
+import jmespath
+
+from fastfetchbot_shared.models.metadata_item import MetadataItem, MediaFile, MessageType
+from fastfetchbot_shared.utils.parse import get_html_text_length, wrap_text_into_html
+
+
+@dataclass
+class Bluesky(MetadataItem):
+    cid: str = ""
+    author_did: str = ""
+    retweet_post: Optional["Bluesky"] = None
+
+    @staticmethod
+    def from_dict(obj: Any) -> "Bluesky":
+        bluesky_item = MetadataItem.from_dict(obj)
+        bluesky_item.cid = obj.get("cid")
+        bluesky_item.author_did = obj.get("author_did")
+        return Bluesky(
+            url=bluesky_item.url,
+            title=bluesky_item.title,
+            author=bluesky_item.author,
+            author_url=bluesky_item.author_url,
+            telegraph_url=bluesky_item.telegraph_url,
+            text=bluesky_item.text,
+            content=bluesky_item.content,
+            media_files=bluesky_item.media_files,
+            category=bluesky_item.category,
+            message_type=bluesky_item.message_type,
+            cid=bluesky_item.cid,
+            author_did=bluesky_item.author_did,
+        )
+
+    def to_dict(self) -> dict:
+        result: dict = super().to_dict()
+        result["cid"] = self.cid
+        result["author_did"] = self.author_did
+        if self.retweet_post:
+            result["retweet_post"] = self.retweet_post.to_dict()
+        return result
diff --git a/apps/api/src/services/scrapers/bluesky/config.py b/apps/api/src/services/scrapers/bluesky/config.py
new file mode 100644
index 0000000..3183639
--- /dev/null
+++ b/apps/api/src/services/scrapers/bluesky/config.py
@@ -0,0 +1,3 @@
+BLUESKY_HOST = "https://bsky.app"
+
+BLUESKY_MAX_LENGTH = 800
diff --git a/apps/api/src/services/scrapers/bluesky/scraper.py b/apps/api/src/services/scrapers/bluesky/scraper.py
new file mode 100644
index 0000000..fd3799a
--- /dev/null
+++ b/apps/api/src/services/scrapers/bluesky/scraper.py
@@ -0,0 +1,191 @@
+from typing import Optional
+from urllib.parse import urlparse
+
+from atproto import AsyncClient, IdResolver, AtUri
+from atproto_client.models.app.bsky.embed.record import ViewRecord
+from atproto_client.models.app.bsky.feed.defs import ThreadViewPost, PostView
+
+from src.config import JINJA2_ENV
+from fastfetchbot_shared.models.metadata_item import MediaFile, MessageType
+from src.services.scrapers.scraper import Scraper, DataProcessor
+from src.services.scrapers.bluesky import Bluesky
+from src.services.scrapers.bluesky.config import BLUESKY_HOST, BLUESKY_MAX_LENGTH
+from fastfetchbot_shared.utils.logger import logger
+from fastfetchbot_shared.utils.parse import wrap_text_into_html
+
+telegram_text_template = JINJA2_ENV.get_template("bluesky_telegram_text.jinja2")
+content_template = JINJA2_ENV.get_template("bluesky_content.jinja2")
+
+
+class BlueskyPost:
+    def __init__(self, bluesky_url: str):
+        self.url: str = bluesky_url
+        bluesky_url_parser = urlparse(bluesky_url)
+        self.bluesky_host: Optional[str] = bluesky_url_parser.netloc
+        bluesky_path = bluesky_url_parser.path
+        self.handle: Optional[str] = bluesky_path.split("/")[2]
+        self.post_rkey: Optional[str] = bluesky_path.split("/")[-1]
+        self.did: str = BlueskyScraper.id_resolver.handle.resolve(self.handle)
+
+
+class BlueskyDataProcessor(DataProcessor):
+
+    def __init__(self, url: str, bluesky_thread_data: ThreadViewPost):
+        self.url: str = url
+        self.bluesky_thread_data: ThreadViewPost = bluesky_thread_data
+        logger.debug(
+            f"BlueskyDataProcessor initialized with url: {url}\n and bluesky_thread_data: \n{bluesky_thread_data}")
+        self._data: dict = {}
+
+    async def get_item(self) -> dict:
+        await self.process_data()
+        bluesky_item = Bluesky.from_dict(self._data)
+        return bluesky_item.to_dict()
+        pass
+
+    async def process_data(self):
+        await self._resolve_thread_data()
+
+    async def _resolve_thread_data(self) -> None:
+        base_post_view_data = await BlueskyDataProcessor._resolve_single_post_data(self.bluesky_thread_data.post)
+        base_post_view_data["url"] = self.url
+
+        post_author_did = base_post_view_data["author_did"]
+
+        parent_posts_text = ""
+        parent_posts_content = ""
+        parent_posts_media_files = []
+        replies_posts_text = ""
+        replies_posts_content = ""
+        replies_posts_media_files = []
+        # get post data from the parent posts whose author is the same as the base post author
+        if self.bluesky_thread_data.parent:
+            parent_posts_data = []
+            parent_post_view = self.bluesky_thread_data.parent
+            await BlueskyDataProcessor._get_parent_posts_data(parent_post_view, parent_posts_data)
+            if parent_posts_data:
+                for post_data in parent_posts_data:
+                    parent_posts_text += "\n" + post_data["text"]
+                    parent_posts_content += post_data["content"]
+                    parent_posts_media_files.extend(post_data["media_files"])
+        # get post data from the replies whose author is the same as the base post author
+        if self.bluesky_thread_data.replies:
+            replies_posts_data = []
+            for post_thread_view in self.bluesky_thread_data.replies:
+                post_view = post_thread_view.post
+                if post_author_did == post_view.author.did:
+                    post_data = await BlueskyDataProcessor._resolve_single_post_data(post_view)
+                    replies_posts_data.append(post_data)
+            if replies_posts_data:
+                for post_data in replies_posts_data:
+                    replies_posts_text += "\n" + post_data["text"]
+                    replies_posts_content += post_data["content"]
+                    replies_posts_media_files.extend(post_data["media_files"])
+        base_post_view_data["text"] = parent_posts_text + base_post_view_data["text"] + replies_posts_text
+        base_post_view_data["content"] = parent_posts_content + base_post_view_data["content"] + replies_posts_content
+        base_post_view_data["media_files"] = parent_posts_media_files + base_post_view_data[
+            "media_files"] + replies_posts_media_files
+
+        if len(base_post_view_data["text"]) > BLUESKY_MAX_LENGTH:
+            base_post_view_data["message_type"] = MessageType.LONG
+        else:
+            base_post_view_data["message_type"] = MessageType.SHORT
+
+        self._data = base_post_view_data
+
+    @staticmethod
+    async def _get_parent_posts_data(parent_post_view: ThreadViewPost, parent_posts_data_list: list) -> None:
+        parent_post_data = await BlueskyDataProcessor._resolve_single_post_data(parent_post_view.post)
+        parent_posts_data_list.append(parent_post_data)
+        if parent_post_view.parent:
+            await BlueskyDataProcessor._get_parent_posts_data(parent_post_view.parent, parent_posts_data_list)
+
+    @staticmethod
+    async def _resolve_single_post_data(post_data: PostView) -> dict:
+        at_uri = AtUri.from_str(post_data.uri)
+        url = BLUESKY_HOST + "/profile/" + post_data.author.handle + "/post/" + at_uri.rkey
+        author = post_data.author.display_name
+        author_url = BLUESKY_HOST + "/profile/" + post_data.author.handle
+        author_did = post_data.author.did
+        text = post_data.record.text
+        created_at = post_data.record.created_at
+
+        parsed_post_data = {
+            "url": url,
+            "title": author + "\'s Bluesky post",
+            "author": author,
+            "author_url": author_url,
+            "text": text,
+            "category": "bluesky",
+            "media_files": [],
+            "created_at": created_at,
+            "author_did": author_did,
+        }
+
+        media_files = []
+        if post_data.embed is not None:
+            # images and videos
+            if "images" in post_data.embed.__dict__:
+                for image in post_data.embed.images:
+                    img_url = image.fullsize
+                    img_item = {
+                        "media_type": "image",
+                        "url": img_url,
+                        "caption": "",
+                    }
+                    media_files.append(img_item)
+            # TODO: handle video, which is in m3u8 format that needs to be downloaded and converted to mp4
+            parsed_post_data["media_files"] = media_files
+            # retweet post
+            if "record" in post_data.embed.__dict__ and post_data.embed.record is ViewRecord:
+                retweet_post_data = await BlueskyDataProcessor._resolve_single_post_data(post_data.embed.record)
+                parsed_post_data["retweet_post"] = retweet_post_data
+
+        content = await BlueskyDataProcessor._generate_html_content(parsed_post_data)
+        text = await BlueskyDataProcessor._generate_telegram_text(parsed_post_data)
+        parsed_post_data["content"] = content
+        parsed_post_data["text"] = text
+
+        return parsed_post_data
+
+    @staticmethod
+    async def _generate_html_content(data: dict) -> str:
+        html_content_text = wrap_text_into_html(data["text"])
+        data["html_content_text"] = html_content_text
+        content = content_template.render(data=data)
+        return content
+
+    @staticmethod
+    async def _generate_telegram_text(data: dict) -> str:
+        text = telegram_text_template.render(data=data)
+        return text
+
+
+class BlueskyScraper(Scraper):
+    id_resolver = IdResolver()
+
+    def __init__(self, username: Optional[str] = None, password: Optional[str] = None):
+        self.client: AsyncClient = AsyncClient()
+        self.username: Optional[str] = username
+        self.password: Optional[str] = password
+        self.did: Optional[str] = None
+
+    async def init(self):
+        if self.username and self.password:
+            await self.client.login(self.username, self.password)
+            # self.did = await self.client.com
+
+    async def get_processor_by_url(self, url: str) -> BlueskyDataProcessor:
+        bluesky_post = BlueskyPost(url)
+        bluesky_post_data = await self._request_post_data(bluesky_post)
+        return BlueskyDataProcessor(url, bluesky_post_data)
+
+    async def _request_post_data(self, bluesky_post: BlueskyPost) -> ThreadViewPost:
+        profile_identify = bluesky_post.did or bluesky_post.handle
+        try:
+            post_data = await self.client.get_post(profile_identify=profile_identify, post_rkey=bluesky_post.post_rkey)
+            post_uri = post_data.uri
+            post_thread_data = await self.client.get_post_thread(uri=post_uri)
+            return post_thread_data.thread
+        except Exception as e:
+            logger.error(f"Error while getting post data: {e}")
diff --git a/apps/api/src/services/scrapers/common.py b/apps/api/src/services/scrapers/common.py
new file mode 100644
index 0000000..d7b83cf
--- /dev/null
+++ b/apps/api/src/services/scrapers/common.py
@@ -0,0 +1,114 @@
+from typing import Optional, Any
+
+from src.models.database_model import Metadata
+from fastfetchbot_shared.models.url_metadata import UrlMetadata
+from fastfetchbot_shared.models.metadata_item import MessageType
+from src.services import (
+    telegraph,
+    inoreader
+)
+from src.services.file_export import video_download, document_export
+from src.services.scrapers import twitter, wechat, reddit, weibo, zhihu, douban, instagram, xiaohongshu, threads
+from src.services.scrapers.scraper_manager import ScraperManager
+from src.database import save_instances
+from fastfetchbot_shared.utils.logger import logger
+from src.config import DATABASE_ON
+
+
+class InfoExtractService(object):
+    service_classes: dict = {
+        "twitter": twitter.Twitter,
+        "threads": threads.Threads,
+        "reddit": reddit.Reddit,
+        "weibo": weibo.Weibo,
+        "wechat": wechat.Wechat,
+        "instagram": instagram.Instagram,
+        "douban": douban.Douban,
+        "zhihu": zhihu.Zhihu,
+        "xiaohongshu": xiaohongshu.Xiaohongshu,
+        "youtube": video_download.VideoDownloader,
+        "bilibili": video_download.VideoDownloader,
+        "inoreader": inoreader.Inoreader,
+    }
+
+    def __init__(
+            self,
+            url_metadata: UrlMetadata,
+            data: Any = None,
+            store_database: Optional[bool] = DATABASE_ON,
+            store_telegraph: Optional[bool] = True,
+            store_document: Optional[bool] = False,
+            **kwargs,
+    ):
+        url_metadata = url_metadata.to_dict()
+        self.url = url_metadata["url"]
+        self.content_type = url_metadata["content_type"]
+        self.source = url_metadata["source"]
+        self.data = data
+        self.kwargs = kwargs
+        self.store_database = store_database
+        self.store_telegraph = store_telegraph
+        self.store_document = store_document
+
+    @property
+    def category(self) -> str:
+        return self.source
+
+    async def get_item(self, metadata_item: Optional[dict] = None) -> dict:
+        if self.content_type == "video":
+            if not self.kwargs.get("category"):
+                self.kwargs["category"] = self.category
+        if not metadata_item:
+            try:
+                if self.category in ["bluesky", "weibo", "other", "unknown"]:  # it is a workaround before the code refactor
+                    await ScraperManager.init_scraper(self.category)
+                    item_data_processor = await ScraperManager.scrapers[self.category].get_processor_by_url(url=self.url)
+                    metadata_item = await item_data_processor.get_item()
+                else:
+                    scraper_item = InfoExtractService.service_classes[self.category](
+                        url=self.url, data=self.data, **self.kwargs
+                    )
+                    metadata_item = await scraper_item.get_item()
+            except Exception as e:
+                logger.error(f"Error while getting item: {e}")
+                raise e
+        logger.info(f"Got metadata item")
+        logger.debug(metadata_item)
+        metadata_item = await self.process_item(metadata_item)
+        return metadata_item
+
+    async def process_item(self, metadata_item: dict) -> dict:
+        if metadata_item.get("message_type") == MessageType.LONG:
+            self.store_telegraph = True
+            logger.info("message type is long, store in telegraph")
+        if self.store_telegraph:
+            telegraph_item = telegraph.Telegraph.from_dict(metadata_item)
+            try:
+                telegraph_url = await telegraph_item.get_telegraph()
+            except Exception as e:
+                logger.error(f"Error while getting telegraph: {e}")
+                telegraph_url = ""
+            metadata_item["telegraph_url"] = telegraph_url
+        if self.store_document or (
+                not self.store_document and metadata_item["telegraph_url"] == ""
+        ):
+            logger.info("store in document")
+            try:
+                pdf_document = document_export.pdf_export.PdfExport(
+                    title=metadata_item["title"], html_string=metadata_item["content"]
+                )
+                output_filename = await pdf_document.export(method="file")
+                metadata_item["media_files"].append(
+                    {
+                        "media_type": "document",
+                        "url": output_filename,
+                        "caption": "",
+                    }
+                )
+            except Exception as e:
+                logger.error(f"Error while exporting document: {e}")
+        metadata_item["title"] = metadata_item["title"].strip()
+        if self.store_database:
+            logger.info("store in database")
+            await save_instances(Metadata.model_construct(**metadata_item))
+        return metadata_item
diff --git a/apps/api/src/services/scrapers/douban/__init__.py b/apps/api/src/services/scrapers/douban/__init__.py
new file mode 100644
index 0000000..4ea8712
--- /dev/null
+++ b/apps/api/src/services/scrapers/douban/__init__.py
@@ -0,0 +1,230 @@
+import re
+from typing import Dict, Optional, Any
+from enum import Enum
+from urllib.parse import urlparse
+
+from bs4 import BeautifulSoup
+from lxml import etree
+
+from fastfetchbot_shared.utils.parse import get_html_text_length, wrap_text_into_html
+from fastfetchbot_shared.utils.network import get_selector, HEADERS
+from fastfetchbot_shared.models.metadata_item import MetadataItem, MediaFile, MessageType
+from src.config import JINJA2_ENV
+
+SHORT_LIMIT = 600
+
+short_text_template = JINJA2_ENV.get_template("douban_short_text.jinja2")
+content_template = JINJA2_ENV.get_template("douban_content.jinja2")
+
+
+class DoubanType(str, Enum):
+    MOVIE_REVIEW = "movie_review"
+    BOOK_REVIEW = "book_review"
+    NOTE = "note"
+    STATUS = "status"
+    GROUP = "group"
+    UNKNOWN = "unknown"
+
+
+class Douban(MetadataItem):
+    item_title: Optional[str]
+    item_url: Optional[str]
+    group_name: Optional[str]
+    group_url: Optional[str]
+    douban_type: DoubanType
+    text_group: Optional[str]
+    raw_content: Optional[str]
+    date: Optional[str]
+
+    def __init__(self, url: str, data: Optional[Any] = None, **kwargs):
+        # metadata fields
+        self.url = url
+        self.title = ""
+        self.author = ""
+        self.author_url = ""
+        self.text = ""
+        self.content = ""
+        self.media_files = []
+        self.category = "douban"
+        self.message_type = MessageType.SHORT
+        # auxiliary fields
+        self.item_title: Optional[str] = None
+        self.item_url: Optional[str] = None
+        self.group_name: Optional[str] = None
+        self.group_url: Optional[str] = None
+        self.douban_type: DoubanType = DoubanType.UNKNOWN
+        self.text_group: Optional[str] = None
+        self.raw_content: Optional[str] = None
+        self.date: Optional[str] = None
+        # reqeust fields
+        self.headers = HEADERS
+        self.headers["Cookie"] = kwargs.get("cookie", "")
+
+    async def get_item(self) -> dict:
+        await self.get_douban()
+        return self.to_dict()
+
+    async def get_douban(self) -> None:
+        self.check_douban_type()
+        await self.get_douban_item()
+
+    def check_douban_type(self):
+        urlparser = urlparse(self.url)
+        host = urlparser.netloc
+        path = urlparser.path
+        if host.find("m.douban") != -1:  # parse the m.douban url
+            host = host.replace("m.douban", "douban")
+            if path.startswith("/movie/review"):
+                self.douban_type = DoubanType.MOVIE_REVIEW
+                host = host.replace("douban", "movie.douban")
+                path = path.replace("/movie/", "/")
+            elif path.startswith("/book/review"):
+                self.douban_type = DoubanType.BOOK_REVIEW
+                host = host.replace("douban", "book.douban")
+                path = path.replace("/book/", "/")
+        if path.startswith("/note/"):
+            self.douban_type = DoubanType.NOTE
+        elif path.startswith("/status/") or re.match(r"/people/\d+/status/\d+", path):
+            self.douban_type = DoubanType.STATUS
+        elif path.startswith("/group/topic/"):
+            self.douban_type = DoubanType.GROUP
+        elif host.startswith("movie.douban") and path.startswith("/review/"):
+            self.douban_type = DoubanType.MOVIE_REVIEW
+        elif host.startswith("book.douban") and path.startswith("/review/"):
+            self.douban_type = DoubanType.BOOK_REVIEW
+        else:
+            self.douban_type = DoubanType.UNKNOWN
+        self.url = f"https://{host}{path}"
+
+    async def get_douban_item(self):
+        function_dict = {
+            DoubanType.MOVIE_REVIEW: self._get_douban_movie_review,
+            DoubanType.BOOK_REVIEW: self._get_douban_book_review,
+            DoubanType.NOTE: self._get_douban_note,
+            DoubanType.STATUS: self._get_douban_status,
+            DoubanType.GROUP: self._get_douban_group_article,
+            DoubanType.UNKNOWN: None,
+        }
+        await function_dict[self.douban_type]()
+        short_text = self._douban_short_text_process()
+        if short_text.endswith("\n"):
+            short_text = short_text[:-1]
+        data = self.__dict__
+        data["short_text"] = short_text
+        self.text = short_text_template.render(data=data)
+        self.raw_content = self.raw_content_to_html(self.raw_content)
+        self.content = wrap_text_into_html(
+            content_template.render(data=data), is_html=True
+        )
+        if get_html_text_length(self.content) > SHORT_LIMIT:
+            self.message_type = MessageType.LONG
+        else:
+            self.message_type = MessageType.SHORT
+
+    async def _get_douban_movie_review(self):
+        selector = await get_selector(url=self.url, headers=self.headers)
+        self.title = selector.xpath('string(//div[@id="content"]//h1//span)')
+        self.author = selector.xpath('string(//header[@class="main-hd"]//span)')
+        self.author_url = selector.xpath('string(//header[@class="main-hd"]/a/@href)')
+        self.item_title = selector.xpath('string(//header[@class="main-hd"]/a[2])')
+        self.item_url = selector.xpath('string(//header[@class="main-hd"]/a[2]/@href)')
+        self.raw_content = str(
+            etree.tostring(
+                selector.xpath("//div[contains(@class,'review-content')]")[0],
+                encoding="utf-8",
+            ),
+            encoding="utf-8",
+        )
+
+    async def _get_douban_book_review(self):
+        selector = await get_selector(self.url, headers=self.headers)
+        self.title = selector.xpath('string(//div[@id="content"]//h1//span)')
+        self.author = selector.xpath('string(//header[@class="main-hd"]//span)')
+        self.author_url = selector.xpath('string(//header[@class="main-hd"]/a/@href)')
+        self.item_title = selector.xpath('string(//header[@class="main-hd"]/a[2])')
+        self.item_url = selector.xpath('string(//header[@class="main-hd"]/a[2]/@href)')
+        self.raw_content = str(
+            etree.tostring(
+                selector.xpath('//div[@id="link-report"]')[0], encoding="utf-8"
+            ),
+            encoding="utf-8",
+        )
+
+    async def _get_douban_note(self):
+        selector = await get_selector(self.url, headers=self.headers)
+        self.title = selector.xpath("string(//h1)")
+        self.author = selector.xpath('string(//div[@class="content"]/a)')
+        self.author_url = selector.xpath('string(//div[@class="content"]/a/@href)')
+        self.raw_content = str(
+            etree.tostring(
+                selector.xpath('//div[@id="link-report"]')[0], encoding="utf-8"
+            ),
+            encoding="utf-8",
+        )
+
+    async def _get_douban_status(self):
+        selector = await get_selector(self.url, headers=self.headers)
+        self.author = selector.xpath('string(//div[@class="content"]/a)')
+        self.author_url = selector.xpath('string(//div[@class="content"]/a/@href)')
+        self.title = self.author + "的广播"
+        self.raw_content = (
+            str(
+                etree.tostring(
+                    selector.xpath('//div[@class="status-saying"]')[0], encoding="utf-8"
+                ),
+                encoding="utf-8",
+            )
+            .replace("<blockquote>", "")
+            .replace("</blockquote>", "")
+            .replace(">+<", "><")
+            .replace("&#13;", "<br>")
+        )
+
+    async def _get_douban_group_article(self):
+        selector = await get_selector(self.url, headers=self.headers)
+        self.title = selector.xpath('string(//div[@id="content"]//h1)')
+        self.title = self.title.replace("\n", "").strip()
+        self.author = selector.xpath('string(//span[@class="from"]//a)')
+        self.author_url = selector.xpath('string(//span[@class="from"]//a/@href)')
+        self.group_name = selector.xpath(
+            'string(//div[@id="g-side-info"]//div[@class="title"]/a)'
+        )
+        self.group_url = selector.xpath(
+            'string(//div[@id="g-side-info"]//div[@class="title"]/a/@href)'
+        )
+        self.raw_content = str(
+            etree.tostring(
+                selector.xpath('//div[@id="link-report"]')[0], encoding="utf-8"
+            ),
+            encoding="utf-8",
+        )
+
+    def _douban_short_text_process(self) -> str:
+        soup = BeautifulSoup(self.raw_content, "html.parser")
+        for img in soup.find_all("img"):
+            media_item = {"media_type": "image", "url": img["src"], "caption": ""}
+            self.media_files.append(MediaFile.from_dict(media_item))
+            img.extract()
+        for item in soup.find_all(["p", "span", "div"]):
+            item.unwrap()
+        for item in soup.find_all(["link", "script"]):
+            item.decompose()
+        for item in soup.find_all("a"):
+            if item.get("title") == "查看原图":
+                item.decompose()
+        short_text = str(soup)
+        short_text = re.sub(r"\n{2,}", "\n", short_text)
+        short_text = re.sub(r"<br\s*/?>", "\n", short_text)
+        return short_text
+
+    @staticmethod
+    def raw_content_to_html(raw_content: str) -> str:
+        # Split the text into paragraphs based on double newlines
+        print(raw_content)
+        paragraphs = raw_content.split('<br>\n')
+        # Wrap each paragraph with <p> tags
+        print(paragraphs)
+        html_paragraphs = [f'<p>{paragraph.strip()}</p>' for paragraph in paragraphs]
+        # Join the paragraphs to form the final HTML string
+        html_string = ''.join(html_paragraphs)
+        return html_string
diff --git a/apps/api/src/services/scrapers/general/__init__.py b/apps/api/src/services/scrapers/general/__init__.py
new file mode 100644
index 0000000..f256512
--- /dev/null
+++ b/apps/api/src/services/scrapers/general/__init__.py
@@ -0,0 +1,40 @@
+from dataclasses import dataclass
+from typing import Any
+
+from fastfetchbot_shared.models.metadata_item import MetadataItem
+
+
+@dataclass
+class GeneralItem(MetadataItem):
+    """
+    GeneralItem: Data class for scraped content from general webpage scrapers.
+    """
+    id: str = ""
+    raw_content: str = ""
+    scraper_type: str = ""  # Which scraper was used (e.g., "firecrawl", "zyte", etc.)
+
+    @staticmethod
+    def from_dict(obj: Any) -> "GeneralItem":
+        metadata_item = MetadataItem.from_dict(obj)
+        return GeneralItem(
+            url=metadata_item.url,
+            title=metadata_item.title,
+            author=metadata_item.author,
+            author_url=metadata_item.author_url,
+            telegraph_url=metadata_item.telegraph_url,
+            text=metadata_item.text,
+            content=metadata_item.content,
+            media_files=metadata_item.media_files,
+            category=metadata_item.category,
+            message_type=metadata_item.message_type,
+            id=obj.get("id", ""),
+            raw_content=obj.get("raw_content", ""),
+            scraper_type=obj.get("scraper_type", ""),
+        )
+
+    def to_dict(self) -> dict:
+        result: dict = super().to_dict()
+        result["id"] = self.id
+        result["raw_content"] = self.raw_content
+        result["scraper_type"] = self.scraper_type
+        return result
diff --git a/apps/api/src/services/scrapers/general/base.py b/apps/api/src/services/scrapers/general/base.py
new file mode 100644
index 0000000..8d454d6
--- /dev/null
+++ b/apps/api/src/services/scrapers/general/base.py
@@ -0,0 +1,208 @@
+import hashlib
+from abc import abstractmethod
+from typing import Optional
+from urllib.parse import urlparse
+
+from bs4 import BeautifulSoup, Doctype
+from openai import AsyncOpenAI
+from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
+
+from src.config import OPENAI_API_KEY
+from fastfetchbot_shared.models.metadata_item import MediaFile, MessageType
+from src.services.scrapers.scraper import Scraper, DataProcessor
+from src.services.scrapers.general import GeneralItem
+from fastfetchbot_shared.utils.parse import get_html_text_length, wrap_text_into_html
+from fastfetchbot_shared.utils.logger import logger
+
+GENERAL_TEXT_LIMIT = 800
+
+DEFAULT_OPENAI_MODEL = "gpt-5-nano"
+
+# System prompt for LLM to extract article content
+ARTICLE_EXTRACTION_PROMPT = """You are an expert content extractor. Your task is to extract the main article content from the provided HTML.
+
+Instructions:
+1. Identify and extract ONLY the main article/post content
+2. Remove navigation, headers, footers, sidebars, ads, comments, and other non-article elements
+3. Preserve the article's structure (headings, paragraphs, lists, etc.)
+4. Keep important formatting like bold, italic, links, and images
+5. Return clean HTML containing only the article content
+6. If you cannot identify the main content, return the original HTML unchanged
+7. After all of the above, remove some basic HTML tags like <!DOCTYPE>, <html>, <script>, <body>
+
+Return ONLY the extracted HTML content, no explanations or markdown."""
+
+
+class BaseGeneralDataProcessor(DataProcessor):
+    """
+    Base class for general webpage data processors.
+    Each specific scraper (Firecrawl, Zyte, etc.) should inherit from this class.
+    """
+
+    def __init__(self, url: str):
+        self.url: str = url
+        self._data: dict = {}
+        self.url_parser = urlparse(url)
+        self.id = hashlib.md5(url.encode()).hexdigest()[:16]
+        self.scraper_type: str = "base"
+
+    async def get_item(self) -> dict:
+        await self.process_data()
+        general_item = GeneralItem.from_dict(self._data)
+        return general_item.to_dict()
+
+    async def process_data(self) -> None:
+        await self._get_page_content()
+
+    @abstractmethod
+    async def _get_page_content(self) -> None:
+        """Subclasses must implement this method to fetch page content."""
+        pass
+
+    async def _build_item_data(
+        self,
+        title: str,
+        author: str,
+        description: str,
+        markdown_content: str,
+        html_content: str,
+        og_image: Optional[str] = None,
+    ) -> None:
+        """
+        Common method to build item data from scraped content.
+        """
+        item_data = {
+            "id": self.id,
+            "category": "other",
+            "url": self.url,
+            "title": title or self.url,
+            "author": author or self.url_parser.netloc,
+            "author_url": f"{self.url_parser.scheme}://{self.url_parser.netloc}",
+            "scraper_type": self.scraper_type,
+        }
+
+        # Process text content - use description or first part of markdown
+        # Strip any HTML tags to ensure plain text for Telegram short messages
+        text = description if description else (markdown_content or "")[:500]
+        text = BeautifulSoup(text, "html.parser").get_text()
+        item_data["text"] = text
+
+        # Process HTML content with LLM if available, then sanitize deterministically
+        if html_content:
+            cleaned_html = await self.parsing_article_body_by_llm(html_content)
+            cleaned_html = self.sanitize_html(cleaned_html)
+            content = wrap_text_into_html(cleaned_html, is_html=True)
+        else:
+            content = wrap_text_into_html(markdown_content or "", is_html=False)
+        item_data["content"] = content
+        item_data["raw_content"] = markdown_content
+
+        # Process media files - extract og:image if available
+        media_files = []
+        if og_image:
+            media_files.append(MediaFile(url=og_image, media_type="image"))
+
+        item_data["media_files"] = [m.to_dict() for m in media_files]
+
+        # Determine the message type based on content length (not text length)
+        item_data["message_type"] = (
+            MessageType.LONG
+            if get_html_text_length(content) > GENERAL_TEXT_LIMIT
+            else MessageType.SHORT
+        )
+
+        self._data = item_data
+
+    @staticmethod
+    def sanitize_html(html_content: str) -> str:
+        """
+        Deterministic HTML sanitizer that removes all non-content tags.
+
+        This runs AFTER the LLM extraction as a safety net — the LLM is unreliable,
+        and when it fails (or when OPENAI_API_KEY is not set), raw Firecrawl HTML
+        (including <!DOCTYPE>, <script>, etc.) passes through unchanged.
+
+        Keeps content-meaningful tags: p, h1-h6, a, b/strong, i/em, u, ul, ol, li,
+        blockquote, pre, code, img, br, table, tr, td, th, thead, tbody.
+        """
+        if not html_content:
+            return html_content
+
+        soup = BeautifulSoup(html_content, "html.parser")
+
+        # Remove DOCTYPE declarations
+        for item in soup.contents:
+            if isinstance(item, Doctype):
+                item.extract()
+
+        # Remove tags that should be destroyed with all their content
+        for tag_name in ["script", "style", "head", "meta", "link", "noscript", "iframe", "svg", "form", "input", "button"]:
+            for tag in soup.find_all(tag_name):
+                tag.decompose()
+
+        # Unwrap structural/layout tags — keep their text content, discard the tag itself
+        for tag_name in ["html", "body", "div", "span", "section", "article", "nav",
+                         "header", "footer", "main", "aside", "figure", "figcaption",
+                         "details", "summary", "dd", "dt", "dl"]:
+            for tag in soup.find_all(tag_name):
+                tag.unwrap()
+
+        return str(soup).strip()
+
+    @staticmethod
+    async def parsing_article_body_by_llm(html_content: str) -> str:
+        """
+        Use LLM to extract the main article content from HTML.
+
+        Args:
+            html_content: Raw HTML content from a scraper
+
+        Returns:
+            Cleaned HTML containing only the main article content
+        """
+        if not html_content:
+            return html_content
+
+        if not OPENAI_API_KEY:
+            logger.warning("OPENAI_API_KEY not configured, skipping LLM parsing")
+            return html_content
+
+        try:
+            client = AsyncOpenAI(api_key=OPENAI_API_KEY)
+
+            # Truncate content if too long to avoid token limits
+            max_content_length = 50000
+            truncated_content = html_content[:max_content_length] if len(html_content) > max_content_length else html_content
+
+            response = await client.chat.completions.create(
+                model=DEFAULT_OPENAI_MODEL,
+                messages=[
+                    ChatCompletionSystemMessageParam(role="system", content=ARTICLE_EXTRACTION_PROMPT),
+                    ChatCompletionUserMessageParam(role="user", content=f"Extract the main article content from this HTML:\n\n{truncated_content}")
+                ],
+                temperature=0.1,
+                max_completion_tokens=10000,
+            )
+
+            extracted_content = response.choices[0].message.content
+
+            if extracted_content:
+                logger.info("Successfully extracted article content using LLM")
+                return extracted_content.strip()
+            else:
+                logger.warning("LLM returned empty content, using original HTML")
+                return html_content
+
+        except Exception as e:
+            logger.error(f"Failed to parse article body with LLM: {e}")
+            return html_content
+
+
+class BaseGeneralScraper(Scraper):
+    """
+    Base class for general webpage scrapers.
+    """
+
+    @abstractmethod
+    async def get_processor_by_url(self, url: str) -> DataProcessor:
+        pass
diff --git a/apps/api/src/services/scrapers/general/firecrawl.py b/apps/api/src/services/scrapers/general/firecrawl.py
new file mode 100644
index 0000000..568a8a8
--- /dev/null
+++ b/apps/api/src/services/scrapers/general/firecrawl.py
@@ -0,0 +1,65 @@
+from src.config import FIRECRAWL_WAIT_FOR
+from src.services.scrapers.general.base import BaseGeneralDataProcessor, BaseGeneralScraper
+from src.services.scrapers.general.firecrawl_client import FirecrawlClient
+from src.services.scrapers.scraper import DataProcessor
+from fastfetchbot_shared.utils.logger import logger
+
+# HTML tags to exclude from Firecrawl output at the source
+FIRECRAWL_EXCLUDE_TAGS = [
+    "nav", "footer", "aside", "script", "style",
+    "noscript", "iframe", "svg", "form",
+]
+
+
+class FirecrawlDataProcessor(BaseGeneralDataProcessor):
+    """
+    FirecrawlDataProcessor: Process URLs using Firecrawl to extract content.
+    """
+
+    def __init__(self, url: str):
+        super().__init__(url)
+        self.scraper_type = "firecrawl"
+        self._client: FirecrawlClient = FirecrawlClient.get_instance()
+
+    async def _get_page_content(self) -> None:
+        try:
+            result = await self._client.scrape_url(
+                url=self.url,
+                formats=["markdown", "html"],
+                only_main_content=True,
+                exclude_tags=FIRECRAWL_EXCLUDE_TAGS,
+                wait_for=FIRECRAWL_WAIT_FOR,
+            )
+            await self._process_firecrawl_result(result)
+        except Exception as e:
+            logger.error(f"Failed to scrape URL with Firecrawl: {e}")
+            raise
+
+    async def _process_firecrawl_result(self, result: dict) -> None:
+        metadata = result.get("metadata", {})
+        markdown_content = result.get("markdown", "")
+        html_content = result.get("html", "")
+
+        # Extract metadata fields
+        title = metadata.get("title", "") or metadata.get("ogTitle", "")
+        author = metadata.get("author", "") or metadata.get("ogSiteName", "")
+        description = metadata.get("description", "") or metadata.get("ogDescription", "")
+        og_image = metadata.get("ogImage")
+
+        await self._build_item_data(
+            title=title,
+            author=author,
+            description=description,
+            markdown_content=markdown_content,
+            html_content=html_content,
+            og_image=og_image,
+        )
+
+
+class FirecrawlScraper(BaseGeneralScraper):
+    """
+    FirecrawlScraper: Scraper implementation using Firecrawl for generic URL scraping.
+    """
+
+    async def get_processor_by_url(self, url: str) -> DataProcessor:
+        return FirecrawlDataProcessor(url)
diff --git a/apps/api/src/services/scrapers/general/firecrawl_client.py b/apps/api/src/services/scrapers/general/firecrawl_client.py
new file mode 100644
index 0000000..880c479
--- /dev/null
+++ b/apps/api/src/services/scrapers/general/firecrawl_client.py
@@ -0,0 +1,94 @@
+from __future__ import annotations
+
+import threading
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+from firecrawl import AsyncFirecrawl
+
+from src.config import FIRECRAWL_API_URL, FIRECRAWL_API_KEY
+
+
+@dataclass(frozen=True)
+class FirecrawlSettings:
+    api_url: str
+    api_key: str
+
+
+class FirecrawlClient:
+    """
+    FirecrawlClient: 对 firecrawl python SDK 的封装 + 单例访问点。
+
+    - 提供 scrape / crawl 等常用方法，方便其他模块调用
+    - 线程安全单例（适合 Web 服务 / worker 多线程场景）
+    """
+
+    _instance: Optional["FirecrawlClient"] = None
+    _lock = threading.Lock()
+
+    def __init__(self, config: FirecrawlSettings):
+        self._settings: FirecrawlSettings = config
+        self._app: AsyncFirecrawl = self._create_app(config)
+
+    @staticmethod
+    def _create_app(config: FirecrawlSettings) -> AsyncFirecrawl:
+        return AsyncFirecrawl(api_url=config.api_url, api_key=config.api_key)
+
+    @classmethod
+    def get_instance(cls) -> "FirecrawlClient":
+        """
+        线程安全的单例获取。
+        - 首次调用可传 settings
+        - 之后重复调用可不传
+        """
+        if cls._instance is not None:
+            return cls._instance
+
+        with cls._lock:
+            if cls._instance is not None:
+                return cls._instance
+
+            config = FirecrawlSettings(
+                api_url=FIRECRAWL_API_URL,
+                api_key=FIRECRAWL_API_KEY,
+            )
+
+            cls._instance = cls(config)
+            return cls._instance
+
+    @classmethod
+    def reset_instance(cls) -> None:
+        """测试用：重置单例。"""
+        with cls._lock:
+            cls._instance = None
+
+    async def scrape_url(
+            self,
+            url: str,
+            formats: Optional[List[str]] = None,
+            only_main_content: bool = True,
+            timeout: Optional[int] = None,
+            exclude_tags: Optional[List[str]] = None,
+            wait_for: Optional[int] = None,
+    ) -> Dict[str, Any]:
+        """
+        Args:
+            url: The URL to scrape.
+            formats: Output formats (e.g. ["markdown", "html"]).
+            only_main_content: If True, extract only the main content.
+            timeout: Request timeout in milliseconds.
+            exclude_tags: HTML tag names to exclude from output (e.g. ["nav", "footer"]).
+            wait_for: Time in milliseconds to wait for JS rendering before scraping.
+        """
+        try:
+            result = await self._app.scrape(
+                url,
+                formats=formats,
+                only_main_content=only_main_content,
+                timeout=timeout,
+                exclude_tags=exclude_tags,
+                wait_for=wait_for,
+            )
+            return result.model_dump(exclude_none=True)
+        except Exception as e:
+            raise RuntimeError(f"Firecrawl scrape_url failed: url={url}") from e
diff --git a/apps/api/src/services/scrapers/general/scraper.py b/apps/api/src/services/scrapers/general/scraper.py
new file mode 100644
index 0000000..f2454a3
--- /dev/null
+++ b/apps/api/src/services/scrapers/general/scraper.py
@@ -0,0 +1,86 @@
+from typing import Optional
+
+from src.config import GENERAL_SCRAPING_API
+from src.services.scrapers.scraper import Scraper, DataProcessor
+from src.services.scrapers.general.base import BaseGeneralScraper
+from src.services.scrapers.general.firecrawl import FirecrawlScraper
+from src.services.scrapers.general.zyte import ZyteScraper
+from fastfetchbot_shared.utils.logger import logger
+
+
+class GeneralScraper(Scraper):
+    """
+    GeneralScraper: A wrapper scraper that delegates to the configured scraper implementation.
+
+    This class acts as a factory/facade that selects the appropriate scraper
+    based on the GENERAL_SCRAPING_API configuration.
+
+    Supported scrapers:
+    - FIRECRAWL: Uses Firecrawl API for scraping
+    - ZYTE: Uses Zyte API for scraping
+    """
+
+    # Registry of available scrapers
+    SCRAPER_REGISTRY: dict[str, type[BaseGeneralScraper]] = {
+        "FIRECRAWL": FirecrawlScraper,
+        "ZYTE": ZyteScraper,
+    }
+
+    def __init__(self, scraper_type: Optional[str] = None):
+        """
+        Initialize the GeneralScraper with a specific scraper type.
+
+        Args:
+            scraper_type: The type of scraper to use. If None, uses GENERAL_SCRAPING_API config.
+        """
+        self.scraper_type = scraper_type or GENERAL_SCRAPING_API
+        self._scraper: Optional[BaseGeneralScraper] = None
+        self._init_scraper()
+
+    def _init_scraper(self) -> None:
+        """Initialize the underlying scraper based on scraper_type."""
+        scraper_class = self.SCRAPER_REGISTRY.get(self.scraper_type.upper())
+
+        if scraper_class is None:
+            available = ", ".join(self.SCRAPER_REGISTRY.keys())
+            logger.error(f"Unknown scraper type: {self.scraper_type}. Available: {available}")
+            # Fall back to Firecrawl as default
+            logger.info("Falling back to FIRECRAWL scraper")
+            scraper_class = FirecrawlScraper
+
+        self._scraper = scraper_class()
+        logger.info(f"Initialized GeneralScraper with {self.scraper_type} backend")
+
+    async def get_processor_by_url(self, url: str) -> DataProcessor:
+        """
+        Get the appropriate data processor for the given URL.
+
+        Args:
+            url: The URL to scrape
+
+        Returns:
+            DataProcessor instance for processing the URL
+        """
+        return await self._scraper.get_processor_by_url(url)
+
+    @classmethod
+    def register_scraper(cls, name: str, scraper_class: type[BaseGeneralScraper]) -> None:
+        """
+        Register a new scraper type.
+
+        Args:
+            name: The name to register the scraper under (e.g., "ZYTE")
+            scraper_class: The scraper class to register
+        """
+        cls.SCRAPER_REGISTRY[name.upper()] = scraper_class
+        logger.info(f"Registered new scraper: {name}")
+
+    @classmethod
+    def get_available_scrapers(cls) -> list[str]:
+        """
+        Get a list of available scraper types.
+
+        Returns:
+            List of registered scraper names
+        """
+        return list(cls.SCRAPER_REGISTRY.keys())
diff --git a/apps/api/src/services/scrapers/general/zyte.py b/apps/api/src/services/scrapers/general/zyte.py
new file mode 100644
index 0000000..804f5b0
--- /dev/null
+++ b/apps/api/src/services/scrapers/general/zyte.py
@@ -0,0 +1,78 @@
+from zyte_api import AsyncZyteAPI
+
+from src.config import ZYTE_API_KEY
+from src.services.scrapers.general.base import BaseGeneralDataProcessor, BaseGeneralScraper
+from src.services.scrapers.scraper import DataProcessor
+from fastfetchbot_shared.utils.logger import logger
+
+
+class ZyteDataProcessor(BaseGeneralDataProcessor):
+    """
+    ZyteDataProcessor: Process URLs using Zyte API to extract content.
+    """
+
+    def __init__(self, url: str):
+        super().__init__(url)
+        self.scraper_type = "zyte"
+
+    async def _get_page_content(self) -> None:
+        if not ZYTE_API_KEY:
+            raise RuntimeError("ZYTE_API_KEY is not configured")
+
+        try:
+            client = AsyncZyteAPI(api_key=ZYTE_API_KEY)
+            result = await client.get(
+                {
+                    "url": self.url,
+                    "browserHtml": True,
+                    "article": True,
+                    "articleOptions": {"extractFrom": "browserHtml"},
+                }
+            )
+            await self._process_zyte_result(result)
+        except Exception as e:
+            logger.error(f"Failed to scrape URL with Zyte: {e}")
+            raise
+
+    async def _process_zyte_result(self, result: dict) -> None:
+        article = result.get("article", {})
+        browser_html = result.get("browserHtml", "")
+
+        # Extract metadata fields from article
+        title = article.get("headline", "") or article.get("name", "")
+
+        # Extract author information
+        authors = article.get("authors", [])
+        author = authors[0].get("name", "") if authors else ""
+
+        description = article.get("description", "") or article.get("articleBodyRaw", "")[:500]
+
+        # Get article body as HTML
+        article_body_html = article.get("articleBodyHtml", "")
+        article_body_raw = article.get("articleBodyRaw", "")
+
+        # Use article body HTML if available, otherwise fall back to browser HTML
+        html_content = article_body_html if article_body_html else browser_html
+        markdown_content = article_body_raw
+
+        # Extract main image
+        main_image = article.get("mainImage", {})
+        og_image = main_image.get("url") if main_image else None
+
+        await self._build_item_data(
+            title=title,
+            author=author,
+            description=description,
+            markdown_content=markdown_content,
+            html_content=html_content,
+            og_image=og_image,
+        )
+
+
+class ZyteScraper(BaseGeneralScraper):
+    """
+    ZyteScraper: Scraper implementation using Zyte API for generic URL scraping.
+    """
+
+    async def get_processor_by_url(self, url: str) -> DataProcessor:
+        return ZyteDataProcessor(url)
diff --git a/apps/api/src/services/scrapers/instagram/__init__.py b/apps/api/src/services/scrapers/instagram/__init__.py
new file mode 100644
index 0000000..dfaad67
--- /dev/null
+++ b/apps/api/src/services/scrapers/instagram/__init__.py
@@ -0,0 +1,271 @@
+# TODO: https://rapidapi.com/arraybobo/api/instagram-scraper-2022
+import re
+from typing import Any, Optional
+from urllib.parse import urlparse
+
+from html import escape
+
+from fastfetchbot_shared.models.metadata_item import MetadataItem, MessageType, MediaFile
+from fastfetchbot_shared.utils.network import get_response
+from fastfetchbot_shared.utils.parse import get_html_text_length
+from fastfetchbot_shared.utils.logger import logger
+from .config import API_HEADERS_LIST, ALL_SCRAPERS
+from src.config import X_RAPIDAPI_KEY
+
+
+class Instagram(MetadataItem):
+    def __init__(self, url: str, data: Optional[Any] = None, **kwargs):
+        self.url = url
+        self.category = "instagram"
+        # auxiliary variables
+        self.urlparser = urlparse(url)
+        self.post_id = re.sub(r".*((/p/)|(/reel/))", "", self.urlparser.path).replace(
+            "/", ""
+        )
+        self.message_type = MessageType.SHORT
+
+    async def get_item(self):
+        await self.get_instagram()
+        return self.to_dict()
+
+    async def get_instagram(self):
+        self._check_instagram_url()
+        await self._get_instagram_info()
+
+    def _check_instagram_url(self):
+        if (
+            self.urlparser.path.find("p") != -1
+            or self.urlparser.path.find("reel") != -1
+        ):
+            self.ins_type = "post"
+        if self.urlparser.path.find("stories") != -1:
+            self.ins_type = "story"
+
+    async def _get_instagram_info(self):
+        ins_functions_dict = {
+            "post": self._get_post_info,
+            "story": self._get_story_info,
+        }
+        ins_info = await ins_functions_dict[self.ins_type]()
+        self._process_ins_info(ins_info)
+
+    async def _get_post_info(self) -> dict:
+        ins_info = {}
+        for scraper in ALL_SCRAPERS:
+            self.scraper = scraper
+            self.host = API_HEADERS_LIST[self.scraper]["host"]
+            self.headers = {
+                "X-RapidAPI-Key": X_RAPIDAPI_KEY,
+                "X-RapidAPI-Host": API_HEADERS_LIST[self.scraper]["top_domain"],
+                "content-type": "application/octet-stream",
+            }
+            params_value = self.url if self.scraper == "looter2" else self.post_id
+            self.params = {API_HEADERS_LIST[self.scraper]["params"]: params_value}
+            response = await get_response(
+                url=self.host, headers=self.headers, params=self.params
+            )
+            if response.status_code != 200:
+                logger.error(
+                    "get_ins_post_item error: %s %s", self.scraper, response.status_code
+                )
+                continue
+            else:
+                ins_data = response.json()
+                logger.debug("get_ins_post_item: %s %s", self.params, ins_data)
+                if type(ins_data) == dict and "graphql" in ins_data:
+                    ins_data = ins_data["graphql"]["shortcode_media"]
+                elif type(ins_data) == dict and "data" in ins_data:
+                    ins_data = ins_data["data"]
+                elif (
+                    type(ins_data) == dict
+                    and "status" in ins_data
+                    and ins_data["status"] is False
+                ):
+                    print("get_ins_post_item error: ", self.scraper)
+                    continue
+                elif type(ins_data) == str and "400" in ins_data:
+                    print("get_ins_post_item error: ", self.scraper, ins_data)
+                    continue
+            if (
+                self.scraper == "looter2"
+                or self.scraper == "ins191"
+                or self.scraper == "ins130"
+            ):
+                ins_info = self._get_ins_post_looter2(ins_data)
+            elif self.scraper == "ins28" or self.scraper == "scraper2" or self.scraper == "api2":
+                ins_info = self._get_ins_post_ins28_scraper2(ins_data)
+            break
+        return ins_info
+
+    def _process_ins_info(self, ins_info: dict):
+        self.__dict__.update(ins_info)
+        self.title = self.author + "'s Instagram post"
+        self.text = escape(self.text)
+        self.text = "<a href='" + self.url + "'>" + self.title + "</a>\n" + self.text
+        if get_html_text_length(self.text) > 500:
+            self.message_type = MessageType.LONG
+
+    @staticmethod
+    def _get_ins_post_looter2(ins_data: dict) -> dict:
+        ins_info = {}
+        ins_text_data = (
+            ins_data["edge_media_to_caption"]["edges"][0]["node"]["text"]
+            if ins_data["edge_media_to_caption"]["edges"]
+            else ""
+        )
+        ins_info["content"] = ""
+        ins_info["text"] = ins_text_data
+        ins_info["author"] = ins_data["owner"]["username"]
+        if ins_data["owner"]["full_name"]:
+            ins_info["author"] += "(" + ins_data["owner"]["full_name"] + ")"
+        ins_info["author_url"] = (
+            "https://www.instagram.com/" + ins_data["owner"]["username"] + "/"
+        )
+        ins_info["media_files"] = []
+        if ins_data["__typename"] == "GraphVideo":
+            ins_info["media_files"].append(
+                MediaFile.from_dict(
+                    {"media_type": "video", "url": ins_data["video_url"], "caption": ""}
+                )
+            ) if ins_data["video_url"] else []
+            ins_info["content"] += (
+                '<video controls src="' + ins_data["video_url"] + '"></video>'
+            )
+        elif ins_data["__typename"] == "GraphImage":
+            ins_info["media_files"].append(
+                MediaFile.from_dict(
+                    {
+                        "media_type": "image",
+                        "url": ins_data["display_url"],
+                        "caption": "",
+                    }
+                )
+            )
+            ins_info["content"] += (
+                '<img src="' + ins_data["display_url"] + '">'
+                if ins_data["display_url"]
+                else ""
+            )
+        elif ins_data["__typename"] == "GraphSidecar":
+            for item in ins_data["edge_sidecar_to_children"]["edges"]:
+                if item["node"]["__typename"] == "GraphVideo":
+                    ins_info["media_files"].append(
+                        MediaFile.from_dict(
+                            {
+                                "media_type": "video",
+                                "url": item["node"]["video_url"],
+                                "caption": "",
+                            }
+                        )
+                    )
+                    ins_info["content"] += (
+                        '<video controls src="'
+                        + item["node"]["video_url"]
+                        + '"></video>'
+                    )
+                elif item["node"]["__typename"] == "GraphImage":
+                    ins_info["media_files"].append(
+                        MediaFile.from_dict(
+                            {
+                                "media_type": "image",
+                                "url": item["node"]["display_url"],
+                                "caption": "",
+                            }
+                        )
+                    )
+                    ins_info["content"] += (
+                        '<img src="' + item["node"]["display_url"] + '">'
+                    )
+        ins_info["content"] += ins_text_data
+        ins_info["status"] = True
+        return ins_info
+
+    @staticmethod
+    def _get_ins_post_ins28_scraper2(ins_data):
+        ins_info = {}
+        ins_text_data = (
+            ins_data["items"][0]["caption"]["text"]
+            if ins_data["items"][0]["caption"]
+            else ""
+        )
+        ins_info["content"] = ""
+        ins_info["text"] = ins_text_data
+        ins_info["author"] = ins_data["items"][0]["user"]["username"]
+        if ins_data["items"][0]["user"]["full_name"]:
+            ins_info["author"] += "(" + ins_data["items"][0]["user"]["full_name"] + ")"
+        ins_info["author_url"] = (
+            "https://www.instagram.com/"
+            + ins_data["items"][0]["user"]["username"]
+            + "/"
+        )
+        ins_info["media_files"] = []
+        if ins_data["items"][0]["media_type"] == 2:
+            ins_info["media_files"].append(
+                MediaFile.from_dict(
+                    {
+                        "media_type": "video",
+                        "url": ins_data["items"][0]["video_versions"][0]["url"],
+                        "caption": "",
+                    }
+                )
+            )
+            ins_info["content"] += (
+                '<video controls src="'
+                + ins_data["items"][0]["video_versions"][0]["url"]
+                + '"></video>'
+            )
+        elif ins_data["items"][0]["media_type"] == 1:
+            ins_info["media_files"].append(
+                MediaFile.from_dict(
+                    {
+                        "media_type": "image",
+                        "url": ins_data["items"][0]["image_versions2"]["candidates"][0][
+                            "url"
+                        ],
+                        "caption": "",
+                    }
+                )
+            )
+            ins_info["content"] += (
+                '<img src="'
+                + ins_data["items"][0]["image_versions2"]["candidates"][0]["url"]
+                + '">'
+            )
+        elif ins_data["items"][0]["media_type"] == 8:
+            for item in ins_data["items"][0]["carousel_media"]:
+                if item["media_type"] == 2:
+                    ins_info["media_files"].append(
+                        MediaFile.from_dict(
+                            {
+                                "media_type": "video",
+                                "url": item["video_versions"][0]["url"],
+                                "caption": "",
+                            }
+                        )
+                    )
+                    ins_info["content"] += (
+                        '<video controls src="'
+                        + item["video_versions"][0]["url"]
+                        + '"></video>'
+                    )
+                elif item["media_type"] == 1:
+                    ins_info["media_files"].append(
+                        MediaFile.from_dict(
+                            {
+                                "media_type": "image",
+                                "url": item["image_versions2"]["candidates"][0]["url"],
+                                "caption": "",
+                            }
+                        )
+                    )
+                    ins_info["content"] += (
+                        '<img src="'
+                        + item["image_versions2"]["candidates"][0]["url"]
+                        + '">'
+                    )
+        ins_info["content"] += ins_text_data
+        ins_info["status"] = True
+        return ins_info
+
+    async def _get_story_info(self):
+        pass
diff --git a/apps/api/src/services/scrapers/instagram/config.py b/apps/api/src/services/scrapers/instagram/config.py
new file mode 100644
index 0000000..e8dbdb9
--- /dev/null
+++ b/apps/api/src/services/scrapers/instagram/config.py
@@ -0,0 +1,33 @@
+API_HEADERS_LIST = {
+    "looter2": {
+        "host": "https://instagram-looter2.p.rapidapi.com/post",
+        "top_domain": "instagram-looter2.p.rapidapi.com",
+        "params": "url",
+    },
+    "ins28": {
+        "host": "https://instagram28.p.rapidapi.com/media_info_v2",
+        "top_domain": "instagram28.p.rapidapi.com",
+        "params": "short_code",
+    },
+    "scraper2": {
+        "host": "https://instagram-scraper2.p.rapidapi.com/media_info_v2",
+        "top_domain": "instagram-scraper2.p.rapidapi.com",
+        "params": "short_code",
+    },
+    "ins191": {
+        "host": "https://instagram191.p.rapidapi.com/v2/post/details-by-shortcode",
+        "top_domain": "instagram191.p.rapidapi.com",
+        "params": "shortcode",
+    },
+    "ins130": {
+        "host": "https://instagram130.p.rapidapi.com/media-info",
+        "top_domain": "instagram130.p.rapidapi.com",
+        "params": "code",
+    },
+    "api2": {
+        "host": "https://instagram-scraper-api2.p.rapidapi.com/v1/post_info",
+        "top_domain": "instagram-scraper-api2.p.rapidapi.com",
+        "params": "code_or_id_or_url",
+    },
+}
+ALL_SCRAPERS = ["ins28", "scraper2", "looter2", "ins191", "ins130"]
diff --git a/apps/api/src/services/scrapers/reddit/__init__.py b/apps/api/src/services/scrapers/reddit/__init__.py
new file mode 100644
index 0000000..7f873fb
--- /dev/null
+++ b/apps/api/src/services/scrapers/reddit/__init__.py
@@ -0,0 +1,124 @@
+import re
+from typing import Optional, Any
+
+import asyncpraw
+from bs4 import BeautifulSoup
+
+from fastfetchbot_shared.models.metadata_item import MetadataItem, MessageType, MediaFile
+from src.config import (
+    REDDIT_CLIENT_ID,
+    REDDIT_CLIENT_SECRET,
+    REDDIT_PASSWORD,
+    REDDIT_USERNAME,
+    JINJA2_ENV,
+)
+from fastfetchbot_shared.utils.parse import unix_timestamp_to_utc, get_html_text_length
+from fastfetchbot_shared.utils.network import get_redirect_url
+
+short_text_template = JINJA2_ENV.get_template("reddit_short_text.jinja2")
+content_template = JINJA2_ENV.get_template("reddit_content.jinja2")
+
+
+class Reddit(MetadataItem):
+    def __init__(self, url, data: Optional[Any] = None, **kwargs):
+        self.url = url
+        self.category = "reddit"
+        self.media_files = []
+        self.message_type = MessageType.LONG
+
+    async def get_item(self) -> dict:
+        await self.get_reddit()
+        return self.to_dict()
+
+    async def get_reddit(self) -> None:
+        self.url = await get_redirect_url(self.url)
+        reddit_data = await self._get_reddit_data()
+        await self._process_reddit_data(reddit_data)
+
+    async def _get_reddit_data(self) -> dict:
+        reddit_user_agent = f"testscript by u/{REDDIT_USERNAME}"
+        reddit = asyncpraw.Reddit(
+            client_id=REDDIT_CLIENT_ID,
+            client_secret=REDDIT_CLIENT_SECRET,
+            password=REDDIT_PASSWORD,
+            user_agent=reddit_user_agent,
+            username=REDDIT_USERNAME,
+        )
+        submission = await reddit.submission(url=self.url)
+        return submission.__dict__
+
+    async def _process_reddit_data(self, reddit_data) -> None:
+        self.url = "https://www.reddit.com" + reddit_data["permalink"]
+        self.title = reddit_data["title"]
+        self.author = reddit_data["author"].name
+        self.author_url = f"https://www.reddit.com/user/{self.author}"
+        self.raw_content = reddit_data["selftext_html"] or ""
+        self.created = unix_timestamp_to_utc(int(reddit_data["created_utc"]))
+        self.score = reddit_data["score"]
+        self.comments_count = reddit_data["num_comments"]
+        self.upvote_ratio = reddit_data["upvote_ratio"]
+        self.subreddit = reddit_data["subreddit"].display_name
+        self.subreddit_name_prefixed = reddit_data["subreddit_name_prefixed"]
+        self.subreddit_url = (
+            f"https://www.reddit.com/{reddit_data['subreddit_name_prefixed']}"
+        )
+        content_html = self.raw_content
+        if "media_metadata" in reddit_data:
+            for media_item in reddit_data["media_metadata"].values():
+                if media_item["e"] == "Image":
+                    media_type = "image"
+                    media_url = media_item["s"]["u"]
+                elif media_item["e"] == "AnimatedImage":
+                    media_type = "video"
+                    media_url = media_item["s"]["gif"]
+                elif media_item["e"] == "Video":
+                    media_type = "video"
+                    media_url = media_item["s"]["gif"]
+                else:
+                    continue
+                self.media_files.append(
+                    MediaFile(
+                        media_type=media_type,
+                        url=media_url,
+                        caption="",
+                    )
+                )
+        if reddit_data.get("post_hint", "") == "image":
+            preview_url = reddit_data["preview"]["images"][0]["source"]["url"]
+            self.media_files.append(
+                MediaFile(
+                    media_type="image",
+                    url=preview_url,
+                    caption="",
+                )
+            )
+            preview_image_html_tag = f"<img src='{preview_url}'>"
+            content_html += preview_image_html_tag
+        self.raw_content = re.sub(r"<!--.*?-->", "", self.raw_content, flags=re.DOTALL)
+        soup = BeautifulSoup(self.raw_content, "html.parser")
+        # resolve content
+        for p in soup.find_all("p"):
+            if p.text == "&#x200B;" or p.text == "\n\n":
+                p.decompose()
+        for a in soup.find_all("a"):
+            if a.text == "[removed]":
+                a.decompose()
+            if a.get("href", "").find("preview.redd.it") != -1:
+                img = soup.new_tag("img")
+                img["src"] = a["href"]
+                a.append(f"<p>{a.text}</p>")
+                a.replace_with(img)
+        self.content = str(soup)
+        # resolve short text
+        for tag in soup.find_all(["p", "span", "div"]):
+            # add '\n' after the tag and then unwrap it
+            tag.append("\n")
+            tag.unwrap()
+        for tag in soup.find_all(["strong"]):
+            tag.replace_with(f"<b>{tag.text}</b>")
+        self.text = str(soup)
+        data = self.__dict__
+        self.content = content_template.render(data=data)
+        self.text = short_text_template.render(data=data)
+        if get_html_text_length(self.text) < 800:
+            self.message_type = MessageType.SHORT
diff --git a/apps/api/src/services/scrapers/scraper.py b/apps/api/src/services/scrapers/scraper.py
new file mode 100644
index 0000000..b314462
--- /dev/null
+++ b/apps/api/src/services/scrapers/scraper.py
@@ -0,0 +1,19 @@
+from abc import abstractmethod
+
+
+class Scraper:
+
+    @abstractmethod
+    async def get_processor_by_url(self, url) -> object:
+        pass
+
+
+class DataProcessor:
+
+    @abstractmethod
+    async def get_item(self) -> dict:
+        pass
+
+    @abstractmethod
+    async def process_data(self) -> None:
+        pass
diff --git a/apps/api/src/services/scrapers/scraper_manager.py b/apps/api/src/services/scrapers/scraper_manager.py
new file mode 100644
index 0000000..ebe3935
--- /dev/null
+++ b/apps/api/src/services/scrapers/scraper_manager.py
@@ -0,0 +1,61 @@
+from typing import Optional
+
+from fastfetchbot_shared.utils.logger import logger
+from src.services.scrapers.bluesky.scraper import BlueskyScraper
+from src.services.scrapers.weibo.scraper import WeiboScraper
+from src.services.scrapers.general.scraper import GeneralScraper
+from src.config import (
+    BLUESKY_USERNAME, BLUESKY_PASSWORD
+)
+
+
+class ScraperManager:
+
+    bluesky_scraper: Optional[BlueskyScraper] = None
+    weibo_scraper: Optional[WeiboScraper] = None
+    general_scraper: Optional[GeneralScraper] = None
+
+    scrapers = {"bluesky": bluesky_scraper,
+                "weibo": weibo_scraper,
+                "other": general_scraper,
+                "unknown": general_scraper}
+
+    @classmethod
+    async def init_scrapers(cls):
+        await cls.init_bluesky_scraper()
+
+    @classmethod
+    async def init_scraper(cls, category: str) -> None:
+        if category in cls.scrapers.keys():
+            scraper = None
+            if category == "bluesky" and not cls.bluesky_scraper:
+                scraper = await cls.init_bluesky_scraper()
+            elif category == "weibo" and not cls.weibo_scraper:
+                scraper = await cls.init_weibo_scraper()
+            elif category in ["other", "unknown"] and not cls.general_scraper:
+                scraper = await cls.init_general_scraper()
+            if scraper:
+                cls.scrapers[category] = scraper
+                # general_scraper serves both "other" and "unknown" — keep both keys in sync
+                if category in ["other", "unknown"]:
+                    cls.scrapers["other"] = scraper
+                    cls.scrapers["unknown"] = scraper
+        else:
+            logger.error(f"Scraper {category} is not supported")
+            raise ValueError(f"Scraper {category} is not supported")
+
+    @classmethod
+    async def init_bluesky_scraper(cls) -> BlueskyScraper:
+        cls.bluesky_scraper = BlueskyScraper(username=BLUESKY_USERNAME, password=BLUESKY_PASSWORD)
+        await cls.bluesky_scraper.init()
+        return cls.bluesky_scraper
+
+    @classmethod
+    async def init_weibo_scraper(cls) -> WeiboScraper:
+        cls.weibo_scraper = WeiboScraper()
+        return cls.weibo_scraper
+
+    @classmethod
+    async def init_general_scraper(cls) -> GeneralScraper:
+        cls.general_scraper = GeneralScraper()
+        return cls.general_scraper
diff --git a/apps/api/src/services/scrapers/threads/__init__.py b/apps/api/src/services/scrapers/threads/__init__.py
new file mode 100644
index 0000000..a56b562
--- /dev/null
+++ b/apps/api/src/services/scrapers/threads/__init__.py
@@ -0,0 +1,191 @@
+import json
+from typing import Dict, Any, Optional
+from urllib.parse import urlparse, unquote
+
+import jmespath
+from playwright.async_api import async_playwright
+
+from fastfetchbot_shared.utils.logger import logger
+from fastfetchbot_shared.utils.parse import get_html_text_length, unix_timestamp_to_utc, wrap_text_into_html
+from fastfetchbot_shared.models.metadata_item import MetadataItem, MediaFile, MessageType
+
+SHORT_LIMIT = 600
+
+
+class Threads(MetadataItem):
+    def __init__(self, url: str, data: Optional[Any] = None, **kwargs):
+        # metadata fields
+        self.url = url
+        self.title = ""
+        self.author = ""
+        self.author_url = ""
+        self.text = ""
+        self.content = ""
+        self.media_files = []
+        self.category = "threads"
+        self.message_type = MessageType.SHORT
+        # auxiliary fields
+        self.text_group = ""
+        self.content_group = ""
+        self.code = urlparse(url).path.split("/")[2]
+        self.pics_url = []
+        self.videos_url = []
+
+    async def get_item(self) -> dict:
+        await self.get_threads()
+        return self.to_dict()
+
+    async def get_threads(self) -> None:
+        thread_data = await self.scrape_thread_data(self.url)
+        self.process_threads_item(thread_data)
+        logger.debug(thread_data)
+
+    @staticmethod
+    def parse_single_threads_data(data: Dict) -> Dict:
+        """The code is referenced from https://scrapfly.io/blog/how-to-scrape-threads/"""
+        """Parse Threads post JSON dataset for the most important fields"""
+        result = jmespath.search(
+            """{
+            text: caption.text,
+            published_on: taken_at,
+            id: id,
+            pk: pk,
+            code: code,
+            username: user.username,
+            user_pic: user.profile_pic_url,
+            user_verified: user.is_verified,
+            user_pk: user.pk,
+            user_id: user.id,
+            has_audio: has_audio,
+            reply_count: text_post_app_info.direct_reply_count,
+            like_count: like_count,
+            media_files: carousel_media[]
+            images: carousel_media[].image_versions2.candidates[1].url,
+            image: image_versions2.candidates[1].url,
+            video: video_versions[1].url,
+            media_count: carousel_media_count,
+            quoted_post: text_post_app_info.share_info.quoted_post,
+            link: text_post_app_info.link_preview_attachment
+        }""",
+            data,
+        )
+        return result
+
+    async def scrape_thread_data(self, url: str) -> dict:
+        """The code is referenced from https://scrapfly.io/blog/how-to-scrape-threads/"""
+        """Scrape Threads post and replies from a given URL"""
+        _xhr_calls = []
+
+        async def intercept_response(response):
+            """capture all background requests and save them"""
+            if response.request.resource_type == "xhr":
+                _xhr_calls.append(response)
+            return response
+
+        async with async_playwright() as pw:
+            browser = await pw.chromium.launch()
+            context = await browser.new_context(viewport={"width": 1920, "height": 1080})
+            page = await context.new_page()
+            page.on("response", intercept_response)  # enable background request intercepting
+            await page.goto(url)  # go to url and wait for the page to load
+            await page.wait_for_selector("[data-pressable-container=true]")  # wait for page to finish loading
+            # find all thread related background requests:
+            gql_calls = [f for f in _xhr_calls if "/api/graphql" in f.url]
+            thread_data = {
+                "threads": [],
+            }
+            for xhr in gql_calls:
+                text = await xhr.text()
+                data = json.loads(text)
+                print(json.dumps(data, indent=4, ensure_ascii=False))
+                threads = data["data"]["data"]["containing_thread"]["thread_items"]
+                for thread in threads:
+                    thread_data["threads"].append(self.parse_single_threads_data(thread["post"]))
+            return thread_data
+
+    def process_threads_item(self, thread_data: Dict) -> None:
+        for thread in thread_data["threads"]:
+            self.process_single_threads(thread)
+        self.text += self.text_group
+        self.content += self.content_group
+        self.message_type = MessageType.LONG if get_html_text_length(self.text) > SHORT_LIMIT else MessageType.SHORT
+
+    def process_single_threads(self, thread: Dict) -> None:
+        if thread["code"] == self.code:  # if the thread is the authoral post
+            self.title = thread["username"] + "'s Threads"
+            self.author = thread["username"]
+            self.author_url = f"https://threads.net/@{thread['username']}"
+            created_at = unix_timestamp_to_utc(thread["published_on"])
+            reply_count = thread["reply_count"]
+            like_count = thread["like_count"]
+            self.content += f"<p>Created at: {created_at} "
+            self.content += f" Reply count: {reply_count} "
+            self.content += f" Like count: {like_count}</p>"
+        thread_info = self.parse_single_threads(thread)
+        self.text_group += thread_info["text_group"]
+        self.content_group += thread_info["content_group"]
+        self.pics_url += thread_info["pics_url"]
+        self.videos_url += thread_info["videos_url"]
+        self.media_files += thread_info["media_files"]
+
+    @staticmethod
+    def parse_single_threads(thread: Dict) -> Dict:
+        thread_info = {
+            "pics_url": [],
+            "videos_url": [],
+            "media_files": [],
+            "text_group": "",
+            "content_group": "",
+        }
+        # make html components, and solve the pictures and videos
+        user_component = f"<a href='https://threads.net/@{thread['username']}'>@{thread['username']}</a>:"
+        thread_info["content_group"] += wrap_text_into_html(user_component + thread["text"])
+        thread_info["text_group"] += user_component + thread["text"] + "\n"
+        if not thread["media_count"]:  # if the thread doesn't have multiple media files
+            if thread["video"]:  # if the threads has only one video/gif
+                thread_info["videos_url"].append(thread["video"])
+                thread_info["media_files"].append(MediaFile(media_type="video", url=thread["video"], caption=""))
+                thread_info["content_group"] += f"<video controls=\"controls\" src=\"{thread['video']}\">"
+            elif thread["image"]:  # if the threads has only one picture
+                thread_info["pics_url"].append(thread["image"])
+                thread_info["media_files"].append(MediaFile(media_type="image", url=thread["image"], caption=""))
+                thread_info["content_group"] += f"<img src=\"{thread['image']}\">"
+        else:  # if the threads has more than one media files
+            for media in thread["media_files"]:
+                if len(media["video_versions"]) > 0:  # if the media is a video/gif
+                    thread_info["videos_url"].append(media["video_versions"][0]["url"])
+                    thread_info["media_files"].append(
+                        MediaFile(
+                            media_type="video",
+                            url=media["video_versions"][0]["url"],
+                            caption="",
+                        )
+                    )
+                    thread_info[
+                        "content_group"
+                    ] += f"<video controls=\"controls\" src=\"{media['video_versions'][0]['url']}\">"
+                else:  # if the media is a picture
+                    thread_info["pics_url"].append(media["image_versions2"]["candidates"][0]["url"])
+                    thread_info["media_files"].append(
+                        MediaFile(
+                            media_type="image",
+                            url=media["image_versions2"]["candidates"][0]["url"],
+                            caption="",
+                        )
+                    )
+                    thread_info["content_group"] += f"<img src=\"{media['image_versions2']['candidates'][0]['url']}\">"
+        thread_info["content_group"] += "<hr>"
+        if thread["link"]:  # process the link item in the threads
+            link_title = thread["link"]["title"]
+            link_url = unquote(urlparse(thread["link"]["url"]).query).split("=")[1].split("&")[0]
+            thread_info["text_group"] += f"<a href='{link_url}'>{link_title}</a>\n"
+            thread_info["content_group"] += f"<p><a href='{link_url}'>{link_title}</a></p><br>"
+        if thread["quoted_post"] is not None:  # solve possible retweeted threads
+            retweeted_thread = Threads.parse_single_threads_data(thread["quoted_post"])
+            retweeted_thread_info = Threads.parse_single_threads(retweeted_thread)
+            thread_info = {
+                key: (thread_info[key] + retweeted_thread_info[key])
+                for key in thread_info.keys()
+                if key in retweeted_thread_info
+            }
+        return thread_info
diff --git a/apps/api/src/services/scrapers/twitter/__init__.py b/apps/api/src/services/scrapers/twitter/__init__.py
new file mode 100644
index 0000000..a48437f
--- /dev/null
+++ b/apps/api/src/services/scrapers/twitter/__init__.py
@@ -0,0 +1,381 @@
+# TODO: https://rapidapi.com/Glavier/api/twitter135
+import asyncio
+import traceback
+from urllib.parse import urlparse
+from typing import Dict, List, Optional, Any, Tuple
+
+import httpx
+import jmespath
+
+from fastfetchbot_shared.models.metadata_item import MetadataItem, MediaFile, MessageType
+from fastfetchbot_shared.utils.parse import get_html_text_length, wrap_text_into_html
+from twitter.scraper import Scraper
+from .config import (
+    ALL_SCRAPER,
+    ALL_SINGLE_SCRAPER,
+    X_RAPIDAPI_HOST,
+    SCRAPER_INFO,
+    SHORT_LIMIT,
+)
+from src.config import X_RAPIDAPI_KEY, TWITTER_COOKIES, DEBUG_MODE
+from fastfetchbot_shared.utils.logger import logger
+
+
+class Twitter(MetadataItem):
+    def __init__(
+            self,
+            url: str,
+            data: Optional[Any] = None,
+            scraper: Optional[str] = "Twitter135",
+            instruction: Optional[str] = "threads",
+            **kwargs,
+    ):
+        # metadata fields
+        self.url = url
+        self.title = ""
+        self.author = ""
+        self.author_url = ""
+        self.text = ""
+        self.content = ""
+        self.media_files: list[MediaFile] = []
+        self.category = "twitter"
+        self.message_type = MessageType.SHORT
+        # auxiliary fields
+        self.tid = urlparse(url).path.split("/")[-1]
+        self.text_group = ""
+        self.content_group = ""
+        self.date = ""
+        # reqeust fields
+        self.instruction = instruction
+        self.scraper = scraper
+        self.host = ""
+        self.headers = {}
+        self.params = {}
+        self.include_comments: bool = kwargs.get("include_comments", False)
+        self.article_tweet: bool = False
+
+    async def get_item(self) -> dict:
+        await self.get_twitter()
+        return self.to_dict()
+
+    async def get_twitter(self) -> None:
+        tweet_data = await self._get_response_tweet_data()
+        self._process_tweet(tweet_data)
+
+    async def _get_response_tweet_data(self) -> Dict:
+        scrapers = ALL_SCRAPER if self.instruction == "threads" else ALL_SINGLE_SCRAPER
+        for scraper in scrapers:
+            self.scraper = scraper
+            try:
+                if self.scraper.startswith("Twitter"):
+                    tweet_data = await self._rapidapi_get_response_tweet_data()
+                    return tweet_data
+                elif self.scraper == "api-client":
+                    tweet_data = await self._api_client_get_response_tweet_data()
+                    return tweet_data
+            except Exception as e:
+                logger.error(e)
+                traceback.print_exc()
+                continue
+        raise Exception("No valid response from all Twitter scrapers")
+
+    async def _rapidapi_get_response_tweet_data(self) -> Dict:
+        async with httpx.AsyncClient() as client:
+            self._get_request_headers()
+            response = await client.get(
+                url=self.host, headers=self.headers, params=self.params
+            )
+            if response.status_code == 200:
+                tweet_data = response.json()
+                if (
+                        type(tweet_data) == dict
+                        and ("errors" in tweet_data or "detail" in tweet_data)
+                ) or (
+                        type(tweet_data) == str
+                        and ("400" in tweet_data or "429" in tweet_data)
+                ):
+                    raise Exception("Invalid response from Twitter API")
+                else:
+                    return tweet_data
+            else:
+                raise Exception("Invalid response from Twitter API")
+
+    async def _api_client_get_response_tweet_data(self) -> Dict:
+        scraper = Scraper(
+            save=False,
+            pbar=False,
+            debug=0,
+            cookies=TWITTER_COOKIES
+        )
+        tweet_data = await asyncio.to_thread(scraper.tweets_details, [int(self.tid)], limit=1)
+        logger.debug(tweet_data)
+        return tweet_data[0]
+
+    def _process_tweet(self, tweet_data: Dict):
+        # if self.scraper == "api-client":
+        #     self.process_twitter_api_client(tweet_data)
+        if self.scraper in ["api-client", "Twitter135"]:
+            self._process_tweet_twitter135(tweet_data)
+        elif self.scraper in ["Twitter154", "twitter-v24"]:
+            self._process_tweet_Twitter154(tweet_data)
+
+    def _process_tweet_twitter135(self, tweet_data: Dict):
+        tweet_data_instructions = tweet_data["data"]["threaded_conversation_with_injections_v2"][
+            "instructions"
+        ]
+        entries_instruction = next(
+            (instr for instr in tweet_data_instructions if 'entries' in instr),
+            None
+        )
+        entries = entries_instruction['entries']
+        tweets = []
+        for entry in entries:
+            content = entry["content"]
+            entry_type = content.get("entryType", "")
+
+            if entry_type == "TimelineTimelineItem":
+                item_content = content.get("itemContent", {})
+                if item_content.get("itemType") == "TimelineTweet":
+                    result = item_content.get("tweet_results", {}).get("result")
+                    if result:
+                        tweets.append(result)
+
+            elif entry_type == "TimelineTimelineModule" and self.include_comments:
+                for module_item in content.get("items", []):
+                    item_content = module_item.get("item", {}).get("itemContent", {})
+                    if item_content.get("itemType") == "TimelineTweet":
+                        result = item_content.get("tweet_results", {}).get("result")
+                        if result:
+                            tweets.append(result)
+        for tweet in tweets:
+            if tweet["__typename"] == "TweetWithVisibilityResults":
+                tweet = tweet["tweet"]
+            parsed_tweet_data = self.parse_tweet_data_Twitter135(tweet)
+            self.process_single_tweet_Twitter135(parsed_tweet_data)
+        self.text += self.text_group
+        self.text = self.text[:-1]
+        self.content += self.content_group
+        self.message_type = (
+            MessageType.LONG if (get_html_text_length(self.text) > SHORT_LIMIT or self.article_tweet) else MessageType.SHORT
+        )
+
+    def process_single_tweet_Twitter135(self, tweet: Dict, retweeted=False) -> None:
+        if tweet.get("tid") == self.tid:
+            if tweet.get("article") and tweet["article"].get("title"):
+                self.title = tweet["article"]["title"]
+                self.article_tweet = True
+            else:
+                self.title = f"{tweet['name']}'s Tweet"
+            self.author = tweet["name"]
+            self.author_url = f"https://twitter.com/{tweet['username']}"
+            self.date = tweet["date"]
+        tweet_info = self.parse_single_tweet_Twitter135(tweet, retweeted=retweeted)
+        self.text_group += tweet_info["text_group"]
+        self.content_group += tweet_info["content_group"]
+        self.media_files += tweet_info["media_files"]
+        if tweet["quoted_tweet"]:
+            retweeted_tweet_info = self.parse_tweet_data_Twitter135(
+                tweet["quoted_tweet"]
+            )
+            self.process_single_tweet_Twitter135(retweeted_tweet_info, retweeted=True)
+        if tweet.get("tid") == self.tid:
+            self.content_group = self.content_group.replace("<hr>", "", 1)
+
+    @staticmethod
+    def parse_single_tweet_Twitter135(tweet: Dict, retweeted=False) -> Dict:
+        tweet_info = {
+            "media_files": [],
+            "text_group": "",
+            "content_group": "<hr>" if not retweeted else "<p>Quoted:</p>",
+        }
+        user_component = f"<a href='https://twitter.com/{tweet['username']}/status/{tweet['tid']}'>@{tweet['name']}</a>"
+
+        if tweet.get("article"):
+            article = tweet["article"]
+            article_title = article.get("title", "")
+            display_text = article_title if article_title else (
+                tweet["full_text"] if tweet.get("full_text") else tweet["text"]
+            )
+            tweet_info["content_group"] += wrap_text_into_html(f"{user_component}: {display_text}")
+            tweet_info["text_group"] += f"{user_component}: {display_text}\n"
+            article_html, article_media = Twitter.parse_article_content(article)
+            tweet_info["content_group"] += article_html
+            tweet_info["media_files"] += article_media
+        else:
+            text = tweet["full_text"] if tweet.get("full_text") else tweet["text"]
+            tweet_info["content_group"] += wrap_text_into_html(f"{user_component}: {text}")
+            tweet_info["text_group"] += f"{user_component}: {text}\n"
+
+        if tweet["media"]:
+            for media in tweet["media"]:
+                if media["type"] == "photo":
+                    photo_url = media["media_url_https"] + "?name=orig"
+                    tweet_info[
+                        "content_group"
+                    ] += f"<img src='{photo_url}'/>"
+                    tweet_info["media_files"].append(
+                        MediaFile(
+                            media_type="image",
+                            url=photo_url,
+                            caption="",
+                        )
+                    )
+                elif media["type"] == "video" or media["type"] == "animated_gif":
+                    highest_bitrate_item = max(
+                        media["video_info"]["variants"],
+                        key=lambda x: x.get("bitrate", 0),
+                    )
+                    tweet_info[
+                        "content_group"
+                    ] += f'<video controls="controls" src="{highest_bitrate_item["url"]}"></video>'
+                    tweet_info["media_files"].append(
+                        MediaFile(
+                            media_type="video",
+                            url=highest_bitrate_item["url"],
+                            caption="",
+                        )
+                    )
+        tweet_info["content_group"] = tweet_info["content_group"].replace("\n", "<br>")
+        return tweet_info
+
+    @staticmethod
+    def parse_tweet_data_Twitter135(data: Dict) -> Dict:
+        result = jmespath.search(
+            """{
+            tid: rest_id,
+            name: core.user_results.result.core.name || core.user_results.result.legacy.name,
+            username: core.user_results.result.core.screen_name || core.user_results.result.legacy.screen_name,
+            date: legacy.created_at,
+            full_text: note_tweet.note_tweet_results.result.text,
+            text: legacy.full_text,
+            media: legacy.extended_entities.media,
+            quoted_tweet: quoted_status_result.result,
+            article: article.article_results.result
+            }""",
+            data,
+        )
+        return result
+
+    def _process_tweet_Twitter154(self, tweet_data: Dict):
+        pass
+
+    @staticmethod
+    def parse_article_content(article: Dict) -> Tuple[str, List[MediaFile]]:
+        content_state = article.get("content_state", {})
+        blocks = content_state.get("blocks", [])
+        entity_map_list = content_state.get("entityMap", [])
+
+        entity_lookup = {}
+        for entry in entity_map_list:
+            entity_lookup[str(entry["key"])] = entry["value"]
+
+        html_parts = []
+        media_files = []
+
+        for block in blocks:
+            block_type = block.get("type", "unstyled")
+            text = block.get("text", "")
+            inline_style_ranges = block.get("inlineStyleRanges", [])
+            entity_ranges = block.get("entityRanges", [])
+
+            if block_type == "atomic":
+                for er in entity_ranges:
+                    entity = entity_lookup.get(str(er["key"]))
+                    if entity and entity.get("type") == "MEDIA":
+                        for media_item in entity.get("data", {}).get("mediaItems", []):
+                            media_id = media_item.get("mediaId", "")
+                            media_url = _find_article_media_url(article, media_id)
+                            if media_url:
+                                html_parts.append(f"<img src='{media_url}'/>")
+                                media_files.append(MediaFile(
+                                    media_type="image",
+                                    url=media_url,
+                                    caption="",
+                                ))
+                continue
+
+            styled_text = _apply_inline_formatting(
+                text, inline_style_ranges, entity_ranges, entity_lookup
+            )
+
+            if block_type == "header-two":
+                html_parts.append(f"<h2>{styled_text}</h2>")
+            else:
+                html_parts.append(f"<p>{styled_text}</p>")
+
+        return "".join(html_parts), media_files
+
+    def _get_request_headers(self):
+        self.host = SCRAPER_INFO[self.scraper]["host"]
+        self.headers = {
+            "X-RapidAPI-Key": X_RAPIDAPI_KEY,
+            "X-RapidAPI-Host": SCRAPER_INFO[self.scraper]["top_domain"]
+                               + X_RAPIDAPI_HOST,
+            "content-type": "application/octet-stream",
+        }
+        self.params = {
+            SCRAPER_INFO[self.scraper]["params"]: self.tid,
+        }
+
+
+def _find_article_media_url(article: Dict, media_id: str) -> str:
+    for entity in article.get("media_entities", []):
+        if str(entity.get("media_id")) == str(media_id):
+            media_info = entity.get("media_info", {})
+            url = media_info.get("original_img_url", "")
+            return url
+    return ""
+
+
+def _apply_inline_formatting(
+        text: str,
+        style_ranges: List[Dict],
+        entity_ranges: List[Dict],
+        entity_lookup: Dict,
+) -> str:
+    if not text or (not style_ranges and not entity_ranges):
+        return text
+
+    n = len(text)
+    bold = [False] * n
+    italic = [False] * n
+    link_url = [None] * n
+
+    for sr in style_ranges:
+        start = sr["offset"]
+        end = start + sr["length"]
+        for i in range(start, min(end, n)):
+            if sr["style"] == "Bold":
+                bold[i] = True
+            elif sr["style"] == "Italic":
+                italic[i] = True
+
+    for er in entity_ranges:
+        entity = entity_lookup.get(str(er["key"]))
+        if entity and entity.get("type") == "LINK":
+            url = entity.get("data", {}).get("url", "")
+            start = er["offset"]
+            end = start + er["length"]
+            for i in range(start, min(end, n)):
+                link_url[i] = url
+
+    result = []
+    i = 0
+    while i < n:
+        cur_bold = bold[i]
+        cur_italic = italic[i]
+        cur_link = link_url[i]
+        j = i
+        while j < n and bold[j] == cur_bold and italic[j] == cur_italic and link_url[j] == cur_link:
+            j += 1
+        segment = text[i:j]
+        if cur_bold:
+            segment = f"<b>{segment}</b>"
+        if cur_italic:
+            segment = f"<i>{segment}</i>"
+        if cur_link:
+            segment = f"<a href='{cur_link}'>{segment}</a>"
+        result.append(segment)
+        i = j
+
+    return "".join(result)
diff --git a/apps/api/src/services/scrapers/twitter/config.py b/apps/api/src/services/scrapers/twitter/config.py
new file mode 100644
index 0000000..15b5278
--- /dev/null
+++ b/apps/api/src/services/scrapers/twitter/config.py
@@ -0,0 +1,31 @@
+"""
+scaper infos
+"""
+
+ALL_SCRAPER = ["api-client", "Twitter135"]
+# ALL_SCRAPER = ["Twitter135", "Twitter154", "twitter-v24"]
+ALL_SINGLE_SCRAPER = ["Twitter154", "twitter-v24", "Twitter135", "api-client"]
+SCRAPER_INFO = {
+    "Twitter135": {
+        "host": "https://twitter135.p.rapidapi.com/v2/TweetDetail/",
+        "top_domain": "twitter135",
+        "params": "id",
+    },
+    "Twitter154": {
+        "host": "https://twitter154.p.rapidapi.com/tweet/details/",
+        "top_domain": "twitter154",
+        "params": "tweet_id",
+    },
+    "twitter-v24": {
+        "host": "https://twitter-v24.p.rapidapi.com/tweet/details",
+        "top_domain": "twitter-v24",
+        "params": "tweet_id",
+    },
+}
+X_RAPIDAPI_HOST = ".p.rapidapi.com"
+
+"""
+twitter constants
+"""
+
+SHORT_LIMIT = 600
diff --git a/apps/api/src/services/scrapers/wechat/__init__.py b/apps/api/src/services/scrapers/wechat/__init__.py
new file mode 100644
index 0000000..2951684
--- /dev/null
+++ b/apps/api/src/services/scrapers/wechat/__init__.py
@@ -0,0 +1,102 @@
+from typing import Dict, Any, Optional
+
+from lxml import etree
+from bs4 import BeautifulSoup, NavigableString
+
+from fastfetchbot_shared.models.metadata_item import MetadataItem, MediaFile, MessageType
+from fastfetchbot_shared.utils.logger import logger
+from fastfetchbot_shared.utils.network import get_selector, HEADERS
+
+
+class Wechat(MetadataItem):
+    def __init__(self, url: str, data: Optional[Any] = None, **kwargs):
+        self.url = url
+        self.title = ""
+        self.author = ""
+        self.author_url = self.url
+        self.text = ""
+        self.content = ""
+        self.media_files: list[MediaFile] = []
+        self.category = "wechat"
+        self.message_type = MessageType.LONG
+        # auxiliary fields
+        self.sid = ""
+        self.official_account = ""
+        self.date = ""
+
+    async def get_item(self) -> dict:
+        await self.get_wechat()
+        return self.to_dict()
+
+    async def get_wechat(self) -> None:
+        wechat_data = await self._get_response_wechat_data()
+        self._process_wechat(wechat_data)
+        pass
+
+    async def _get_response_wechat_data(self) -> Dict:
+        wechat_data = await get_selector(self.url, headers=HEADERS)
+        wechat_data = self._wechat_data_parse(wechat_data)
+        return wechat_data
+
+    @staticmethod
+    def _wechat_data_parse(wechat_data: etree.HTML) -> Dict:
+        article = wechat_data.xpath('//div[@id="js_article"]')[0]
+        meta_data = {
+            "title": article.xpath('string(//h1[@id="activity-name"])'),
+            "author": article.xpath('string(//a[@id="js_name"])'),
+            "content": str(
+                etree.tostring(
+                    article.xpath('//div[@id="js_content"]')[0], encoding="utf-8"
+                ),
+                encoding="utf-8",
+            ),
+        }
+        for k, v in meta_data.items():
+            new_string = v.replace("\n", "")
+            meta_data[k] = new_string.strip()
+        return meta_data
+
+    def _process_wechat(self, wechat_data: Dict) -> None:
+        self.title = wechat_data["title"]
+        self.author = wechat_data["author"]
+        self.author_url = ""
+        soup = BeautifulSoup(wechat_data["content"], "lxml")
+        for img_item in soup.find_all("img"):
+            if img_item.get("class") and all(
+                    elem in img_item.get("class") for elem in ["rich_pages", "wxw-img"]
+            ):
+                img_url = img_item["data-src"]
+                img_item["src"] = img_url
+                img_item["data-src"] = img_url
+                self.media_files.append(MediaFile(url=img_url, media_type="image"))
+        for section_tag in soup.find_all("section"):
+            # if no p tag in section tag, then we consider that all text tags are span tags. We divide paragraphs by
+            # <br/><br/> tags pair, unwrap all other tags, and wrap them with <p> tags.
+            if not section_tag.find_all("section"):
+                new_p_tag = soup.new_tag("p")
+                contents = section_tag.contents[:]
+                for content in contents:
+                    content.extract()
+                for content in contents:
+                    if (
+                            content.name == "br"
+                            and content.next_sibling
+                            and content.next_sibling.name == "br"
+                    ):
+                        content.decompose()
+                        content.next_sibling.decompose()
+                        if new_p_tag.contents:
+                            section_tag.append(new_p_tag)
+                            new_p_tag = soup.new_tag("p")
+                    elif content.name == "p":
+                        if new_p_tag.contents:
+                            section_tag.append(new_p_tag)
+                            new_p_tag = soup.new_tag("p")
+                        section_tag.append(content)
+                    else:
+                        new_p_tag.append(content)
+                if new_p_tag.contents:
+                    section_tag.append(new_p_tag)
+        self.raw_content = str(soup)
+        self.content = self.raw_content
+        self.text = soup.get_text()
diff --git a/apps/api/src/services/scrapers/weibo/__init__.py b/apps/api/src/services/scrapers/weibo/__init__.py
new file mode 100644
index 0000000..ac7eaee
--- /dev/null
+++ b/apps/api/src/services/scrapers/weibo/__init__.py
@@ -0,0 +1,54 @@
+import json
+from dataclasses import dataclass
+from typing import Optional, Any
+from urllib.parse import urlparse
+
+import httpx
+import jmespath
+from bs4 import BeautifulSoup
+from lxml import html
+
+from fastfetchbot_shared.models.metadata_item import MetadataItem, MediaFile, MessageType
+from fastfetchbot_shared.utils.network import get_response_json, get_random_user_agent
+from fastfetchbot_shared.utils.parse import get_html_text_length, wrap_text_into_html
+from .config import (
+    AJAX_HOST,
+    AJAX_LONGTEXT_HOST,
+    WEIBO_WEB_HOST,
+    WEIBO_HOST,
+    WEIBO_TEXT_LIMIT,
+)
+from src.config import JINJA2_ENV, WEIBO_COOKIES
+from fastfetchbot_shared.utils.logger import logger
+
+short_text_template = JINJA2_ENV.get_template("weibo_short_text.jinja2")
+content_template = JINJA2_ENV.get_template("weibo_content.jinja2")
+
+
+@dataclass
+class Weibo(MetadataItem):
+    id: str = ""
+
+    @staticmethod
+    def from_dict(obj: Any) -> "Weibo":
+        weibo_item = MetadataItem.from_dict(obj)
+        weibo_item.id = obj.get("id")
+        return Weibo(
+            url=weibo_item.url,
+            title=weibo_item.title,
+            author=weibo_item.author,
+            author_url=weibo_item.author_url,
+            telegraph_url=weibo_item.telegraph_url,
+            text=weibo_item.text,
+            content=weibo_item.content,
+            media_files=weibo_item.media_files,
+            category=weibo_item.category,
+            message_type=weibo_item.message_type,
+            id=weibo_item.id,
+        )
+
+    def to_dict(self) -> dict:
+        result: dict = super().to_dict()
+        result["id"] = self.id
+        return result
+
diff --git a/apps/api/src/services/scrapers/weibo/config.py b/apps/api/src/services/scrapers/weibo/config.py
new file mode 100644
index 0000000..bf21d7f
--- /dev/null
+++ b/apps/api/src/services/scrapers/weibo/config.py
@@ -0,0 +1,5 @@
+AJAX_HOST = "https://weibo.com/ajax/statuses/show?id="
+AJAX_LONGTEXT_HOST = "https://weibo.com/ajax/statuses/longtext?id="
+WEIBO_WEB_HOST = "https://m.weibo.cn/status/"
+WEIBO_HOST = "https://weibo.com"
+WEIBO_TEXT_LIMIT = 700
diff --git a/apps/api/src/services/scrapers/weibo/scraper.py b/apps/api/src/services/scrapers/weibo/scraper.py
new file mode 100644
index 0000000..53a6ac5
--- /dev/null
+++ b/apps/api/src/services/scrapers/weibo/scraper.py
@@ -0,0 +1,501 @@
+import json
+from typing import Optional, Any, Union
+from urllib.parse import urlparse
+
+import httpx
+import jmespath
+from bs4 import BeautifulSoup
+from lxml import html
+
+from fastfetchbot_shared.models.metadata_item import MetadataItem, MediaFile, MessageType
+from src.services.scrapers.scraper import Scraper, DataProcessor
+from src.services.scrapers.weibo import Weibo
+from fastfetchbot_shared.utils.network import get_response_json, get_random_user_agent
+from fastfetchbot_shared.utils.parse import get_html_text_length, wrap_text_into_html
+from .config import (
+    AJAX_HOST,
+    AJAX_LONGTEXT_HOST,
+    WEIBO_WEB_HOST,
+    WEIBO_HOST,
+    WEIBO_TEXT_LIMIT,
+)
+from src.config import JINJA2_ENV, WEIBO_COOKIES
+from fastfetchbot_shared.utils.logger import logger
+
+short_text_template = JINJA2_ENV.get_template("weibo_short_text.jinja2")
+content_template = JINJA2_ENV.get_template("weibo_content.jinja2")
+
+
+class WeiboDataProcessor(DataProcessor):
+
+    def __init__(
+            self,
+            url: str,
+            method: str = "webpage",
+            user_agent: dict = None,
+            cookies: str = WEIBO_COOKIES
+    ):
+        if not user_agent:
+            user_agent = get_random_user_agent()
+        self.url: str = url
+        self._data: dict = {}
+        self.url = url
+        self.method = method
+        self.text = ""
+        self.headers = {"User-Agent": user_agent, "Cookie": cookies if cookies else ""}
+        self.url_parser = urlparse(url)
+        self.id = self.url_parser.path.split("/")[-1]
+        self.ajax_url = AJAX_HOST + self.id
+        self.ajax_longtext_url = AJAX_LONGTEXT_HOST + self.id
+
+    async def get_item(self) -> dict:
+        await self.process_data()
+        weibo_item = Weibo.from_dict(self._data)
+        return weibo_item.to_dict()
+
+    async def process_data(self) -> None:
+        await self._get_weibo()
+
+    async def _get_weibo(self) -> None:
+        try:
+            weibo_info = await self._get_weibo_info()
+        except ConnectionError as e:
+            self.method = "webpage"
+            weibo_info = await self._get_weibo_info()
+            logger.error(f"Failed to get weibo info by api: {e}")
+            # TODO: a better exception handling
+        try:
+            await self._process_weibo_item(weibo_info)
+        except Exception as e:
+            logger.error(f"Failed to process weibo item: {e}")
+
+    async def _get_weibo_info(self, method=None) -> dict:
+        try:
+            if not method:
+                method = self.method
+            if method == "webpage":
+                weibo_info = await self._get_weibo_info_webpage()
+            elif method == "api":
+                weibo_info = await self._get_weibo_info_api()
+            else:
+                raise ValueError("method must be webpage or api")
+            weibo_info = self._parse_weibo_info(weibo_info)
+            return weibo_info
+        except ConnectionError as e:
+            raise ConnectionError(f"There are some network issues: {e}")
+
+    async def _get_weibo_info_webpage(self) -> dict:
+        url = WEIBO_WEB_HOST + self.id
+        async with httpx.AsyncClient() as client:
+            response = await client.get(url, headers=self.headers)
+            if response.status_code == 302:  # redirect
+                new_url = response.headers["Location"]
+                response = await client.get(new_url, headers=self.headers)
+        html_string = response.text
+        html_string = html_string[html_string.find('"status":'):]
+        html_string = html_string[: html_string.rfind('"hotScheme"')]
+        html_string = html_string[: html_string.rfind(",")]
+        html_string = html_string[: html_string.rfind("][0] || {};")]
+        html_string = "{" + html_string
+        try:
+            js = json.loads(html_string, strict=False)
+            print(js)
+            weibo_info = js.get("status")
+        except Exception as e:
+            logger.error(f"Failed to get weibo info by webpage scraping: {e}")
+            weibo_info = {}
+        return weibo_info
+
+    async def _get_weibo_info_api(self) -> dict:
+        try:
+            ajax_json = await get_response_json(self.ajax_url, headers=self.headers)
+            logger.debug(f"weibo ajax_json info by api: {ajax_json}")
+            if not ajax_json or ajax_json["ok"] == 0:
+                raise ConnectionError(f"Failed to get weibo info by api")
+            return ajax_json
+        except Exception as e:
+            raise ConnectionError(f"Failed to get weibo info by api: {e}")
+
+    async def _get_long_weibo_info_api(self) -> dict:
+        ajax_json = await get_response_json(
+            self.ajax_longtext_url, headers=self.headers
+        )
+        logger.debug(f"weibo ajax_json info by api: {ajax_json}")
+        return ajax_json
+
+    async def _process_weibo_item(self, weibo_info: dict) -> None:
+        # self.id = str(weibo_info.get("id"))
+        # get user info
+        weibo_item_data = {
+            "category": "weibo",
+            "url": self.url,
+            "user_id": weibo_info.get("user_id"),
+            "author": weibo_info.get("author"),
+            "author_url": weibo_info.get("author_url"),
+            "title": weibo_info.get("author") + "的微博",
+            "date": weibo_info.get("created", None),
+            "source": weibo_info.get("source", None),
+            "region_name": weibo_info.get("region_name", None),
+            "attitudes_count": self._string_to_int(weibo_info.get("attitudes_count", 0)),
+            "comments_count": self._string_to_int(weibo_info.get("comments_count", 0)),
+            "reposts_count": self._string_to_int(weibo_info.get("reposts_count", 0)),
+        }
+        # resolve text
+        # check if the weibo is longtext weibo (which means >140 characters so has an excerpt) or not
+        text = weibo_info.get("text", "")
+        if (
+                weibo_info["is_long_text"]
+                or text.endswith('<span class="expand">展开</span>')
+                or text.endswith("展开")
+                or not text
+        ):
+            # if a weibo has more than 9 pictures, the isLongText will be True even if it is not a longtext weibo
+            # however, we cannot get the full text of such kind of weibo from longtext api (it will return None)
+            # so, it is necessary to check if a weibo is a real longtext weibo or not for getting the full text
+            try:
+                longtext_info = await self._get_weibo_info(method="webpage")
+                # if longtext_info.get("is_long_text"):
+                #     raise Exception("Still a long text weibo, should go long text api.")
+                text = longtext_info.get("text")
+                if not text:
+                    raise Exception(
+                        "Failed to get longtext of weibo by webpage scraping."
+                    )
+            except Exception as e:
+                logger.error(f"Failed to get longtext of weibo by webpage scraping.{e}")
+                try:
+                    longtext_info = await self._get_long_weibo_info_api()
+                    longtext_info = longtext_info.get("data", {})
+                    text = longtext_info.get("longTextContent")
+                except Exception as e:
+                    logger.error(f"Failed to get longtext of weibo by api.{e}")
+            # The two methods can both fail in some cases. So, we need to check if the text is None or not.
+        else:
+            # TODO: to add a branch to get the fulltext without using the webpage scraping. This branch needs cookies.
+            pass
+        cleaned_text, fw_pics = WeiboDataProcessor._weibo_html_text_clean(text)
+        media_files = []
+        for pic in fw_pics:
+            media_files.append(MediaFile(url=pic, media_type="image"))
+        text = cleaned_text.replace("<br />", "<br>").replace("br/", "br")
+        raw_content = text.replace("<br/><br/>", "<br>")
+        # resolve medias
+        extra_medias = self._get_media_files(weibo_info)
+        if extra_medias:
+            media_files += extra_medias
+        # render the text and content
+        weibo_item_data["raw_content"] = raw_content
+        text = short_text_template.render(data=weibo_item_data).replace("<br />", "\n").replace("<br>", "\n")
+        text = text[:-1] if text.endswith("\n") else text
+        weibo_item_data["text"] = text
+        for i in media_files:
+            if i.media_type == "video":
+                raw_content += f'<video src="{i.url}" controls="controls"></video>'
+            elif i.media_type == "image":
+                raw_content += f'<img src="{i.url}">'
+        content = content_template.render(data=weibo_item_data)
+        content = wrap_text_into_html(
+            wrap_text_into_html(content, is_html=True), is_html=False
+        )
+        weibo_item_data["media_files"] = media_files
+        weibo_item_data["content"] = content
+        # resolve retweet
+        if weibo_info.get("retweeted_status"):
+            retweeted_weibo_id = (
+                    weibo_info["retweeted_status"].get("id")
+                    or weibo_info["retweeted_status"].get("mid")
+                    or weibo_info["retweeted_status"].get("idstr")
+            )
+            retweeted_weibo_item = WeiboDataProcessor(url=WEIBO_WEB_HOST + retweeted_weibo_id)
+            retweeted_info = await retweeted_weibo_item.get_item()
+            weibo_item_data["text"] += retweeted_info["text"]
+            weibo_item_data["content"] += "<hr>" + retweeted_info["content"]
+            weibo_item_data["media_files"] += retweeted_info["media_files"]
+        # type check
+        weibo_item_data["message_type"] = (
+            MessageType.LONG
+            if get_html_text_length(weibo_item_data["text"]) > WEIBO_TEXT_LIMIT
+            else MessageType.SHORT
+        )
+        media_file_dict = [i.to_dict() for i in media_files]
+        weibo_item_data["media_files"] = media_file_dict
+        self._data = weibo_item_data
+
+
+    @staticmethod
+    def _parse_weibo_info(data: dict) -> dict:
+        expression = f"""{{
+            "id": id,
+            "author": user.screen_name,
+            "author_url": user.profile_url,
+            "user_id": user.id,
+            "created": created_at,
+            "source": source,
+            "region_name": region_name,
+            "text": text,
+            "text_raw": text_raw,
+            "text_length": textLength,
+            "is_long_text": isLongText,
+            "pic_num": pic_num,
+            "pic_video": pic_video,
+            "pic_infos": pic_infos,
+            "page_info": page_info,
+            "pics": pics,
+            "mix_media_info": mix_media_info,
+            "url_struct": url_struct,
+            "attitudes_count": attitudes_count,
+            "comments_count": comments_count,
+            "reposts_count": reposts_count,
+            "retweeted_status": retweeted_status
+        }}"""
+        weibo_info = jmespath.search(expression, data)
+        return weibo_info
+
+    def _get_media_files(self, weibo_info: dict) -> list:
+        """
+        The function is used to get all media files (pictures, videos, live photos) from a weibo item
+        The design of weibo media files is very complicated and confusing. It can be divided from the following aspects:
+        1. pic_infos: the media files of a weibo item are stored in pic_infos. This key only appears when the weibo item
+        has only pictures. However, live photos, which is actually "videos", are also stored in pic_infos. So, we need
+        to check the type and add it into the media files list.
+        2. page_info: the media files of a weibo item are stored in page_info. This key only appears when the weibo item
+        has only one video.
+        3. mix_media_info: the media files of a weibo item are stored in mix_media_info. This key only appears when the
+        weibo item has both pictures and videos.
+        We separate the media files scraping process into three parts according to the above aspects. For keeping the
+        order of the media files, we use a list to store the media files.
+        :param weibo_info:
+        :return: media_files: a list of media files
+        """
+        media_files = []
+        media_files += self._get_pictures(weibo_info)
+        media_files += self._get_videos(weibo_info)
+        media_files += self._get_mix_media(weibo_info)
+        return media_files
+
+    @staticmethod
+    def _get_pictures(weibo_info: dict) -> list:
+        media_files = []
+        if weibo_info.get("pics"):
+            pic_info = weibo_info["pics"]
+            if pic_info:
+                for pic in pic_info:
+                    media_files.append(
+                        MediaFile(
+                            url=pic["large"]["url"], media_type="image", caption=""
+                        )
+                    )
+                    if pic.get("type") in ["gifvideos", "livephoto"]:
+                        media_files.append(
+                            MediaFile(
+                                url=pic["videoSrc"], media_type="video", caption=""
+                            )
+                        )
+        elif "pic_infos" in weibo_info and weibo_info.get("pic_num") > 0:
+            pic_info = weibo_info["pic_infos"]
+            if pic_info:
+                for pic in pic_info:
+                    if pic_info[pic].get("type") == "pic":
+                        media_files.append(
+                            MediaFile(
+                                url=pic_info[pic]["original"]["url"],
+                                media_type="image",
+                                caption="",
+                            )
+                        ) if pic_info[pic]["original"] else media_files.append(
+                            MediaFile(
+                                url=pic_info[pic]["large"]["url"],
+                                media_type="image",
+                                caption="",
+                            )
+                        )
+                    elif pic_info[pic].get("type") in ["live_photo", "livephoto"]:
+                        media_files.append(
+                            MediaFile(
+                                url=pic_info[pic]["original"]["url"], media_type="image"
+                            )
+                        ) if pic_info[pic]["original"] else media_files.append(
+                            MediaFile(pic_info[pic]["large"]["url"])
+                        )
+                        live_pic_url = pic_info[pic]["video"]["url"]
+                        if not (live_pic_url[-4] == "." and live_pic_url[-3:] != "mp4"):
+                            media_files.append(
+                                MediaFile(
+                                    url=pic_info[pic]["video"], media_type="video"
+                                )
+                            )
+                    elif pic_info[pic].get("type") == "gif":
+                        media_files.append(
+                            MediaFile(url=pic_info[pic]["video"], media_type="video")
+                        )
+        else:
+            return media_files
+        return media_files
+
+    @staticmethod
+    def _get_videos(weibo_info: dict) -> list:
+        media_files, video_url_list = [], []
+        if weibo_info.get("page_info"):
+            if (
+                    weibo_info["page_info"].get("urls")
+                    or weibo_info["page_info"].get("media_info")
+            ) and (
+                    weibo_info["page_info"].get("type") == "video"
+                    or weibo_info["page_info"].get("object_type") == "video"
+            ):
+                media_info = (
+                    weibo_info["page_info"]["urls"]
+                    if weibo_info["page_info"].get("urls")
+                    else ""
+                )
+                if not media_info:
+                    media_info = weibo_info["page_info"]["media_info"]
+                video_url_keys = [
+                    "mp4_720p_mp4",
+                    "mp4_hd_url",
+                    "hevc_mp4_hd",
+                    "mp4_sd_url",
+                    "mp4_ld_mp4",
+                    "stream_url_hd",
+                    "stream_url",
+                ]
+                for key in video_url_keys:
+                    video_url = media_info.get(key)
+                    if video_url:
+                        break
+                video_url_list.append(video_url)
+        for url in video_url_list:
+            media_files.append(MediaFile(url=url, media_type="video"))
+        return media_files
+
+    @staticmethod
+    def _get_mix_media(weibo_info: dict) -> list:
+        media_files = []
+        if weibo_info.get("mix_media_info"):
+            for item in weibo_info["mix_media_info"]["items"]:
+                if item.get("type") == "pic":
+                    media_files.append(
+                        MediaFile(
+                            url=item["data"]["original"]["url"], media_type="image"
+                        )
+                    ) if item["data"]["original"] else media_files.append(
+                        MediaFile(url=item["data"]["large"]["url"], media_type="image")
+                    )
+                elif item.get("type") in ["live_photo", "livephoto"]:
+                    media_files.append(
+                        MediaFile(
+                            url=item["data"]["original"]["url"], media_type="image"
+                        )
+                    ) if item["data"]["original"] else media_files.append(
+                        MediaFile(url=item["data"]["large"]["url"], media_type="image")
+                    )
+                    media_files.append(
+                        MediaFile(url=item["data"]["video"]["url"], media_type="video")
+                    )
+                elif item.get("type") == "gif":
+                    media_files.append(
+                        MediaFile(url=item["data"]["video"]["url"], media_type="video")
+                    )
+                elif item.get("type") == "video":
+                    video_url = item.get("stream_url_hd")
+                    video_keys = [
+                        "mp4_720p_mp4",
+                        "mp4_hd_url",
+                        "hevc_mp4_hd",
+                        "mp4_sd_url",
+                        "mp4_ld_mp4",
+                        "stream_url_hd",
+                        "stream_url",
+                    ]
+                    for key in video_keys:
+                        video_url = item["data"]["media_info"].get(key)
+                        if video_url:
+                            break
+                    media_files.append(MediaFile(url=video_url, media_type="video"))
+        return media_files
+
+    @staticmethod
+    def _string_to_int(string: Union[str | int]) -> int:
+        """
+        Convert Chinese numeric string to int
+        :param string: str
+        :return: int: int value of the string
+        """
+        if isinstance(string, int):
+            return string
+        elif string.endswith("万+"):
+            string = string[:-2] + "0000"
+        elif string.endswith("万"):
+            string = float(string[:-1]) * 10000
+        elif string.endswith("亿"):
+            string = float(string[:-1]) * 100000000
+        return int(string)
+
+    @staticmethod
+    def _get_live_photo(weibo_info: dict) -> list:
+        live_photo_list = []
+        live_photo = weibo_info.get("pic_video")
+        if live_photo:
+            prefix = "https://video.weibo.com/media/play?livephoto=//us.sinaimg.cn/"
+            for i in live_photo.split(","):
+                if len(i.split(":")) == 2:
+                    url = prefix + i.split(":")[1] + ".mov"
+                    live_photo_list.append(url)
+            return live_photo_list
+
+    @staticmethod
+    def _weibo_html_text_clean(text, method="bs4"):
+        if method == "bs4":
+            return WeiboDataProcessor._weibo_html_text_clean_bs4(text)
+        elif method == "lxml":
+            return WeiboDataProcessor._weibo_html_text_clean_lxml(text)
+        else:
+            raise ValueError("method must be bs4 or lxml")
+
+    @staticmethod
+    def _weibo_html_text_clean_bs4(text):
+        fw_pics = []
+        soup = BeautifulSoup(text, "html.parser")
+        for img in soup.find_all("img"):
+            alt_text = img.get("alt", "")
+            img.replace_with(alt_text)
+        for image in soup.find_all("image"):
+            if image.get("src") == "https://h5.sinaimg.cn/upload/2015/09/25/3/timeline_card_small_web_default.png":
+                image.replace_with("")
+        for a in soup.find_all("a"):
+            if a.text == "查看图片":
+                fw_pics.append(a.attrs.get("href"))
+            if "/n/" in a.get("href") and a.get("usercard"):
+                a["href"] = "https://weibo.com" + a.attrs.get("href")
+        for i in soup.find_all("span"):
+            i.unwrap()
+        res = (
+            str(soup)
+            .replace('href="//', 'href="http://')
+            .replace('href="/n/', 'href="http://weibo.com/n/')
+        )
+        return res, fw_pics
+
+    @staticmethod
+    def _weibo_html_text_clean_lxml(text):
+        selector = html.fromstring(text)
+        # remove all img tags and replace with alt text
+        for img in selector.xpath("//img"):
+            alt_text = img.get("alt", "")
+            # get innerhtml pure text of the parent tag
+            parent_text = img.getparent().text_content() if img.getparent() else ""
+            replace_text = alt_text + parent_text
+            text_node = html.fromstring(replace_text)
+            img.addprevious(text_node)
+            img.getparent().remove(img)
+            # make text_node become pure text
+            text_node.text = text_node.text_content()
+        # return the html document after cleaning
+        return html.tostring(selector, encoding="unicode")
+
+
+class WeiboScraper(Scraper):
+    weibo_cookies = WEIBO_COOKIES
+
+    async def get_processor_by_url(self, url) -> DataProcessor:
+        return WeiboDataProcessor(url, cookies=self.weibo_cookies)
diff --git a/apps/api/src/services/scrapers/xiaohongshu/__init__.py b/apps/api/src/services/scrapers/xiaohongshu/__init__.py
new file mode 100644
index 0000000..6225a35
--- /dev/null
+++ b/apps/api/src/services/scrapers/xiaohongshu/__init__.py
@@ -0,0 +1,153 @@
+import asyncio
+from typing import Any
+from urllib.parse import urlparse
+
+import httpx
+import jmespath
+
+from fastfetchbot_shared.models.metadata_item import MetadataItem, MediaFile, MessageType
+from fastfetchbot_shared.utils.network import HEADERS
+from src.config import JINJA2_ENV, HTTP_REQUEST_TIMEOUT
+from .xhs.core import XiaoHongShuCrawler
+from .xhs.client import XHSClient
+from .xhs import proxy_account_pool
+
+from fastfetchbot_shared.utils.logger import logger
+from fastfetchbot_shared.utils.parse import (
+    unix_timestamp_to_utc,
+    get_html_text_length,
+    wrap_text_into_html,
+)
+
+environment = JINJA2_ENV
+short_text_template = environment.get_template("xiaohongshu_short_text.jinja2")
+content_template = environment.get_template("xiaohongshu_content.jinja2")
+
+
+class Xiaohongshu(MetadataItem):
+    def __init__(self, url: str, data: Any, **kwargs):
+        self.url = url
+        self.id = None
+        self.media_files = []
+        self.category = "xiaohongshu"
+        self.message_type = MessageType.SHORT
+        # auxiliary fields
+        self.ip_location = None
+        self.share_count = None
+        self.comment_count = None
+        self.collected_count = None
+        self.like_count = None
+        self.updated = None
+        self.created = None
+        self.raw_content = None
+
+    async def get_item(self) -> dict:
+        await self.get_xiaohongshu()
+        return self.to_dict()
+
+    async def get_xiaohongshu(self) -> None:
+        if self.url.find("xiaohongshu.com") == -1:
+            async with httpx.AsyncClient() as client:
+                resp = await client.get(
+                    self.url,
+                    headers=HEADERS,
+                    follow_redirects=True,
+                    timeout=HTTP_REQUEST_TIMEOUT,
+                )
+                if (
+                    resp.history
+                ):  # if there is a redirect, the request will have a response chain
+                    for h in resp.history:
+                        print(h.status_code, h.url)
+                    self.url = str(resp.url)
+        urlparser = urlparse(self.url)
+        self.id = urlparser.path.split("/")[-1]
+        crawler = XiaoHongShuCrawler()
+        account_pool = proxy_account_pool.create_account_pool()
+        crawler.init_config("xhs", "cookie", account_pool)
+        note_detail = None
+        for _ in range(5):
+            try:
+                note_detail = await crawler.start(id=self.id)
+                break
+            except Exception as e:
+                await asyncio.sleep(3)
+                logger.error(f"error: {e}")
+                logger.error(f"retrying...")
+        if not note_detail:
+            raise Exception("重试了这么多次还是无法签名成功，寄寄寄")
+        # logger.debug(f"json_data: {json.dumps(note_detail, ensure_ascii=False, indent=4)}")
+        parsed_data = self.process_note_json(note_detail)
+        await self.process_xiaohongshu_note(parsed_data)
+
+    async def process_xiaohongshu_note(self, json_data: dict):
+        self.title = json_data.get("title")
+        self.author = json_data.get("author")
+        if not self.title and self.author:
+            self.title = f"{self.author}的小红书笔记"
+        self.author_url = "https://www.xiaohongshu.com/user/profile/" + json_data.get(
+            "user_id"
+        )
+        self.raw_content = json_data.get("raw_content")
+        logger.debug(f"{json_data.get('created')}")
+        self.created = (
+            unix_timestamp_to_utc(json_data.get("created") / 1000)
+            if json_data.get("created")
+            else None
+        )
+        self.updated = (
+            unix_timestamp_to_utc(json_data.get("updated") / 1000)
+            if json_data.get("updated")
+            else None
+        )
+        self.like_count = json_data.get("like_count")
+        self.collected_count = json_data.get("collected_count")
+        self.comment_count = json_data.get("comment_count")
+        self.share_count = json_data.get("share_count")
+        self.ip_location = json_data.get("ip_location")
+        if json_data.get("image_list"):
+            for image_url in json_data.get("image_list"):
+                self.media_files.append(MediaFile(url=image_url, media_type="image"))
+        if json_data.get("video"):
+            self.media_files.append(
+                MediaFile(url=json_data.get("video"), media_type="video")
+            )
+        data = self.__dict__
+        data["raw_content"] = data["raw_content"].replace("\t", "")
+        if data["raw_content"].endswith("\n"):
+            data["raw_content"] = data["raw_content"][:-1]
+        self.text = short_text_template.render(data=data)
+        if get_html_text_length(self.text) > 500:
+            self.message_type = MessageType.LONG
+        data["raw_content"] = wrap_text_into_html(self.raw_content)
+        for media_file in self.media_files:
+            if media_file.media_type == "image":
+                data["raw_content"] += f'<p><img src="{media_file.url}" alt=""/></p>'
+            elif media_file.media_type == "video":
+                data[
+                    "raw_content"
+                ] += (
+                    f'<p><video src="{media_file.url}" controls="controls"></video></p>'
+                )
+        self.content = content_template.render(data=data)
+
+    @staticmethod
+    def process_note_json(json_data: dict):
+        expression = """
+        {
+        title: title,
+        raw_content: desc,
+        author: user.nickname,
+        user_id: user.user_id,
+        image_list: image_list[*].url,
+        video: video.media.stream.h264[0].master_url,
+        like_count: interact_info.liked_count,
+        collected_count: interact_info.collected_count,
+        comment_count: interact_info.comment_count,
+        share_count: interact_info.share_count,
+        ip_location: ip_location,
+        created: time,
+        updated: last_update_time
+        }
+        """
+        return jmespath.search(expression, json_data)
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/__init__.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/__init__.py
new file mode 100644
index 0000000..947f034
--- /dev/null
+++ b/apps/api/src/services/scrapers/xiaohongshu/xhs/__init__.py
@@ -0,0 +1,2 @@
+from .core import XiaoHongShuCrawler
+from .field import *
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/base_crawler.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/base_crawler.py
new file mode 100644
index 0000000..d00b38a
--- /dev/null
+++ b/apps/api/src/services/scrapers/xiaohongshu/xhs/base_crawler.py
@@ -0,0 +1,35 @@
+from abc import ABC, abstractmethod
+
+from .proxy_account_pool import AccountPool
+
+
+class AbstractCrawler(ABC):
+    @abstractmethod
+    def init_config(self, platform: str, login_type: str, account_pool: AccountPool):
+        pass
+
+    @abstractmethod
+    async def start(self):
+        pass
+
+    @abstractmethod
+    async def search(self):
+        pass
+
+
+class AbstractLogin(ABC):
+    @abstractmethod
+    async def begin(self):
+        pass
+
+    @abstractmethod
+    async def login_by_qrcode(self):
+        pass
+
+    @abstractmethod
+    async def login_by_mobile(self):
+        pass
+
+    @abstractmethod
+    async def login_by_cookies(self):
+        pass
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/client.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/client.py
new file mode 100644
index 0000000..367ba6a
--- /dev/null
+++ b/apps/api/src/services/scrapers/xiaohongshu/xhs/client.py
@@ -0,0 +1,217 @@
+import asyncio
+import json
+from typing import Dict
+
+import httpx
+from playwright.async_api import BrowserContext, Page
+
+from fastfetchbot_shared.utils.logger import logger
+from .exception import DataFetchError, IPBlockError
+from .field import SearchNoteType, SearchSortType
+from .help import get_search_id, sign
+from . import utils
+
+
+class XHSClient:
+    def __init__(
+        self,
+        timeout=10,
+        proxies=None,
+        *,
+        headers: Dict[str, str],
+        playwright_page: Page,
+        cookie_dict: Dict[str, str],
+    ):
+        self.proxies = proxies
+        self.timeout = timeout
+        self.headers = headers
+        self._host = "https://edith.xiaohongshu.com"
+        self.IP_ERROR_STR = "网络连接异常，请检查网络设置或重启试试"
+        self.IP_ERROR_CODE = 300012
+        self.NOTE_ABNORMAL_STR = "笔记状态异常，请稍后查看"
+        self.NOTE_ABNORMAL_CODE = -510001
+        self.playwright_page = playwright_page
+        self.cookie_dict = cookie_dict
+
+    async def _pre_headers(self, url: str, data=None):
+        encrypt_params = await self.playwright_page.evaluate(
+            "([url, data]) => window._webmsxyw(url,data)", [url, data]
+        )
+        local_storage = await self.playwright_page.evaluate("() => window.localStorage")
+        signs = sign(
+            a1=self.cookie_dict.get("a1", ""),
+            b1=local_storage.get("b1", ""),
+            x_s=encrypt_params.get("X-s", ""),
+            x_t=str(encrypt_params.get("X-t", "")),
+        )
+
+        headers = {
+            "X-S": signs["x-s"],
+            "X-T": signs["x-t"],
+            "x-S-Common": signs["x-s-common"],
+            "X-B3-Traceid": signs["x-b3-traceid"],
+        }
+        self.headers.update(headers)
+        return self.headers
+
+    async def request(self, method, url, **kwargs) -> Dict:
+        async with httpx.AsyncClient(proxies=self.proxies) as client:
+            response = await client.request(method, url, timeout=self.timeout, **kwargs)
+        data: Dict = response.json()
+        if data["success"]:
+            return data.get("data", data.get("success", {}))
+        elif data["code"] == self.IP_ERROR_CODE:
+            raise IPBlockError(self.IP_ERROR_STR)
+        else:
+            raise DataFetchError(data.get("msg", None))
+
+    async def get(self, uri: str, params=None) -> Dict:
+        final_uri = uri
+        if isinstance(params, dict):
+            final_uri = f"{uri}?" f"{'&'.join([f'{k}={v}' for k, v in params.items()])}"
+        headers = await self._pre_headers(final_uri)
+        return await self.request(
+            method="GET", url=f"{self._host}{final_uri}", headers=headers
+        )
+
+    async def post(self, uri: str, data: dict) -> Dict:
+        headers = await self._pre_headers(uri, data)
+        json_str = json.dumps(data, separators=(",", ":"), ensure_ascii=False)
+        return await self.request(
+            method="POST", url=f"{self._host}{uri}", data=json_str, headers=headers
+        )
+
+    async def ping(self) -> bool:
+        """get a note to check if login state is ok"""
+        logger.info("Begin to ping xhs...")
+        ping_flag = False
+        try:
+            note_card: Dict = await self.get_note_by_keyword(keyword="小红书")
+            if note_card.get("items"):
+                ping_flag = True
+        except Exception as e:
+            logger.error(f"Ping xhs failed: {e}, and try to login again...")
+            ping_flag = False
+        return ping_flag
+
+    async def update_cookies(self, browser_context: BrowserContext):
+        cookie_str, cookie_dict = utils.convert_cookies(await browser_context.cookies())
+        self.headers["Cookie"] = cookie_str
+        self.cookie_dict = cookie_dict
+
+    async def get_note_by_keyword(
+        self,
+        keyword: str,
+        page: int = 1,
+        page_size: int = 20,
+        sort: SearchSortType = SearchSortType.GENERAL,
+        note_type: SearchNoteType = SearchNoteType.ALL,
+    ) -> Dict:
+        """search note by keyword
+
+        :param keyword: what notes you want to search
+        :param page: page number, defaults to 1
+        :param page_size: page size, defaults to 20
+        :param sort: sort ordering, defaults to SearchSortType.GENERAL
+        :param note_type: note type, defaults to SearchNoteType.ALL
+        :return: {has_more: true, items: []}
+        """
+        uri = "/api/sns/web/v1/search/notes"
+        data = {
+            "keyword": keyword,
+            "page": page,
+            "page_size": page_size,
+            "search_id": get_search_id(),
+            "sort": sort.value,
+            "note_type": note_type.value,
+        }
+        return await self.post(uri, data)
+
+    async def get_note_by_id(self, note_id: str) -> Dict:
+        """
+        :param note_id: note_id you want to fetch
+        :return: {"time":1679019883000,"user":{"nickname":"nickname","avatar":"avatar","user_id":"user_id"},"image_list":[{"url":"https://sns-img-qc.xhscdn.com/c8e505ca-4e5f-44be-fe1c-ca0205a38bad","trace_id":"1000g00826s57r6cfu0005ossb1e9gk8c65d0c80","file_id":"c8e505ca-4e5f-44be-fe1c-ca0205a38bad","height":1920,"width":1440}],"tag_list":[{"id":"5be78cdfdb601f000100d0bc","name":"jk","type":"topic"}],"desc":"裙裙","interact_info":{"followed":false,"liked":false,"liked_count":"1732","collected":false,"collected_count":"453","comment_count":"30","share_count":"41"},"at_user_list":[],"last_update_time":1679019884000,"note_id":"6413cf6b00000000270115b5","type":"normal","title":"title"}
+        """
+        data = {"source_note_id": note_id}
+        uri = "/api/sns/web/v1/feed"
+        res = await self.post(uri, data)
+        res_dict: Dict = res["items"][0]["note_card"]
+        return res_dict
+
+    async def get_note_comments(self, note_id: str, cursor: str = "") -> Dict:
+        """get note comments
+        :param note_id: note id you want to fetch
+        :param cursor: last you get cursor, defaults to ""
+        :return: {"has_more": true,"cursor": "6422442d000000000700dcdb",comments: [],"user_id": "63273a77000000002303cc9b","time": 1681566542930}
+        """
+        uri = "/api/sns/web/v2/comment/page"
+        params = {"note_id": note_id, "cursor": cursor}
+        return await self.get(uri, params)
+
+    async def get_note_sub_comments(
+        self, note_id: str, root_comment_id: str, num: int = 30, cursor: str = ""
+    ):
+        """
+        get note sub comments
+        :param note_id: note id you want to fetch
+        :param root_comment_id: parent comment id
+        :param num: recommend 30, if num greater 30, it only return 30 comments
+        :param cursor: last you get cursor, defaults to ""
+        :return: {"has_more": true,"cursor": "6422442d000000000700dcdb",comments: [],"user_id": "63273a77000000002303cc9b","time": 1681566542930}
+        """
+        uri = "/api/sns/web/v2/comment/sub/page"
+        params = {
+            "note_id": note_id,
+            "root_comment_id": root_comment_id,
+            "num": num,
+            "cursor": cursor,
+        }
+        return await self.get(uri, params)
+
+    async def get_note_all_comments(
+        self, note_id: str, crawl_interval: float = 1.0, is_fetch_sub_comments=False
+    ):
+        """
+        get note all comments include sub comments
+        :param note_id:
+        :param crawl_interval:
+        :param is_fetch_sub_comments:
+        :return:
+        """
+
+        result = []
+        comments_has_more = True
+        comments_cursor = ""
+        while comments_has_more:
+            comments_res = await self.get_note_comments(note_id, comments_cursor)
+            comments_has_more = comments_res.get("has_more", False)
+            comments_cursor = comments_res.get("cursor", "")
+            comments = comments_res["comments"]
+            if not is_fetch_sub_comments:
+                result.extend(comments)
+                continue
+            # handle get sub comments
+            for comment in comments:
+                result.append(comment)
+                cur_sub_comment_count = int(comment["sub_comment_count"])
+                cur_sub_comments = comment["sub_comments"]
+                result.extend(cur_sub_comments)
+                sub_comments_has_more = (
+                    comment["sub_comment_has_more"]
+                    and len(cur_sub_comments) < cur_sub_comment_count
+                )
+                sub_comment_cursor = comment["sub_comment_cursor"]
+                while sub_comments_has_more:
+                    page_num = 30
+                    sub_comments_res = await self.get_note_sub_comments(
+                        note_id, comment["id"], num=page_num, cursor=sub_comment_cursor
+                    )
+                    sub_comments = sub_comments_res["comments"]
+                    sub_comments_has_more = (
+                        sub_comments_res["has_more"] and len(sub_comments) == page_num
+                    )
+                    sub_comment_cursor = sub_comments_res["cursor"]
+                    result.extend(sub_comments)
+                    await asyncio.sleep(crawl_interval)
+            await asyncio.sleep(crawl_interval)
+        return result
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/core.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/core.py
new file mode 100644
index 0000000..8e450a7
--- /dev/null
+++ b/apps/api/src/services/scrapers/xiaohongshu/xhs/core.py
@@ -0,0 +1,225 @@
+import asyncio
+import random
+from asyncio import Task
+from typing import Dict, List, Optional, Tuple
+
+from playwright.async_api import BrowserContext, BrowserType, Page, async_playwright
+
+from .base_crawler import AbstractCrawler
+from .proxy_account_pool import AccountPool
+
+# import xiaohongshu as xhs_model
+from fastfetchbot_shared.utils.logger import logger
+from src import config
+
+# from var import request_keyword_var
+from .client import XHSClient
+from .exception import DataFetchError
+from .login import XHSLogin
+from . import utils
+
+
+class XiaoHongShuCrawler(AbstractCrawler):
+    platform: str
+    login_type: str
+    context_page: Page
+    xhs_client: XHSClient
+    account_pool: AccountPool
+    browser_context: BrowserContext
+
+    def __init__(self) -> None:
+        self.index_url = "https://www.xiaohongshu.com"
+        self.user_agent = utils.get_user_agent()
+
+    def init_config(
+        self, platform: str, login_type: str, account_pool: AccountPool
+    ) -> None:
+        self.platform = platform
+        self.login_type = login_type
+        self.account_pool = account_pool
+
+    async def start(self, id: str) -> dict:
+        account_phone, playwright_proxy, httpx_proxy = self.create_proxy_info()
+        async with async_playwright() as playwright:
+            # Launch a browser context.
+            chromium = playwright.chromium
+            self.browser_context = await self.launch_browser(
+                chromium, playwright_proxy, self.user_agent, headless=True
+            )
+            # stealth.min.js is a js script to prevent the website from detecting the crawler.
+            await self.browser_context.add_init_script(path="app/utils/stealth.min.js")
+            # add a cookie attribute webId to avoid the appearance of a sliding captcha on the webpage
+            await self.browser_context.add_cookies(
+                [
+                    {
+                        "name": "webId",
+                        "value": "xxx123",  # any value
+                        "domain": ".xiaohongshu.com",
+                        "path": "/",
+                    }
+                ]
+            )
+            await asyncio.sleep(1)
+            self.context_page = await self.browser_context.new_page()
+            await asyncio.sleep(1)
+            await self.context_page.goto(self.index_url)
+            await asyncio.sleep(1)
+            # Create a client to interact with the xiaohongshu website.
+            self.xhs_client = await self.create_xhs_client(httpx_proxy)
+            if not await self.xhs_client.ping():
+                login_obj = XHSLogin(
+                    login_type=self.login_type,
+                    login_phone=account_phone,
+                    browser_context=self.browser_context,
+                    context_page=self.context_page,
+                    cookie_str=config.XIAOHONGSHU_COOKIES,
+                )
+                await asyncio.sleep(1)
+                await login_obj.begin()
+                await asyncio.sleep(1)
+                await self.xhs_client.update_cookies(
+                    browser_context=self.browser_context
+                )
+
+            # Search for notes and retrieve their comment information.
+            # await self.search()
+
+            logger.info("Xhs Crawler finished ...")
+            return await self.xhs_client.get_note_by_id(id)
+
+    async def search(self) -> None:
+        """Search for notes and retrieve their comment information."""
+        logger.info("Begin search xiaohongshu keywords")
+        xhs_limit_count = 20  # xhs limit page fixed value
+        for keyword in config.KEYWORDS.split(","):
+            # set keyword to context var
+            # request_keyword_var.set(keyword)
+            logger.info(f"Current search keyword: {keyword}")
+            page = 1
+            while page * xhs_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
+                note_id_list: List[str] = []
+                notes_res = await self.xhs_client.get_note_by_keyword(
+                    keyword=keyword,
+                    page=page,
+                )
+                semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
+                task_list = [
+                    self.get_note_detail(post_item.get("id"), semaphore)
+                    for post_item in notes_res.get("items", {})
+                    if post_item.get("model_type") not in ("rec_query", "hot_query")
+                ]
+                note_details = await asyncio.gather(*task_list)
+                for note_detail in note_details:
+                    if note_detail is not None:
+                        # await xhs_model.update_xhs_note(note_detail)
+                        note_id_list.append(note_detail.get("note_id"))
+                page += 1
+                logger.info(f"Note details: {note_details}")
+                await self.batch_get_note_comments(note_id_list)
+
+    async def get_note_detail(
+        self, note_id: str, semaphore: asyncio.Semaphore
+    ) -> Optional[Dict]:
+        """Get note detail"""
+        async with semaphore:
+            try:
+                return await self.xhs_client.get_note_by_id(note_id)
+            except DataFetchError as ex:
+                logger.error(f"Get note detail error: {ex}")
+                return None
+
+    async def batch_get_note_comments(self, note_list: List[str]):
+        """Batch get note comments"""
+        logger.info(f"Begin batch get note comments, note list: {note_list}")
+        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
+        task_list: List[Task] = []
+        for note_id in note_list:
+            task = asyncio.create_task(
+                self.get_comments(note_id, semaphore), name=note_id
+            )
+            task_list.append(task)
+        await asyncio.gather(*task_list)
+
+    async def get_comments(self, note_id: str, semaphore: asyncio.Semaphore):
+        """Get note comments"""
+        async with semaphore:
+            logger.info(f"Begin get note id comments {note_id}")
+            all_comments = await self.xhs_client.get_note_all_comments(
+                note_id=note_id, crawl_interval=random.random()
+            )
+            # for comment in all_comments:
+            #     await xhs_model.update_xhs_note_comment(
+            #         note_id=note_id, comment_item=comment
+            #     )
+
+    def create_proxy_info(self) -> Tuple[Optional[str], Optional[Dict], Optional[str]]:
+        """Create proxy info for playwright and httpx"""
+        # phone: 13012345671  ip_proxy: 111.122.xx.xx1:8888
+        phone, ip_proxy = self.account_pool.get_account()
+        if not config.XHS_ENABLE_IP_PROXY:
+            return phone, None, None
+        logger.info("Begin proxy info for playwright and httpx ...")
+        playwright_proxy = {
+            "server": f"{config.IP_PROXY_PROTOCOL}{ip_proxy}",
+            "username": config.IP_PROXY_USER,
+            "password": config.IP_PROXY_PASSWORD,
+        }
+        httpx_proxy = f"{config.IP_PROXY_PROTOCOL}{config.IP_PROXY_USER}:{config.IP_PROXY_PASSWORD}@{ip_proxy}"
+        return phone, playwright_proxy, httpx_proxy
+
+    async def create_xhs_client(self, httpx_proxy: Optional[str]) -> XHSClient:
+        """Create xhs client"""
+        logger.info("Begin create xiaohongshu API client ...")
+        cookie_str, cookie_dict = utils.convert_cookies(
+            await self.browser_context.cookies()
+        )
+        xhs_client_obj = XHSClient(
+            proxies=httpx_proxy,
+            headers={
+                "User-Agent": self.user_agent,
+                "Cookie": cookie_str,
+                "Origin": "https://www.xiaohongshu.com",
+                "Referer": "https://www.xiaohongshu.com",
+                "Content-Type": "application/json;charset=UTF-8",
+            },
+            playwright_page=self.context_page,
+            cookie_dict=cookie_dict,
+        )
+        return xhs_client_obj
+
+    async def launch_browser(
+        self,
+        chromium: BrowserType,
+        playwright_proxy: Optional[Dict],
+        user_agent: Optional[str],
+        headless: bool = True,
+    ) -> BrowserContext:
+        """Launch browser and create browser context"""
+        logger.info("Begin create browser context ...")
+        if config.XHS_SAVE_LOGIN_STATE:
+            # feat issue #14
+            # we will save login state to avoid login every time
+            user_data_dir = config.TEMP_DIR
+            # user_data_dir = os.path.join(
+            #     os.getcwd(), "browser_data",  self.platform
+            # )  # type: ignore
+            browser_context = await chromium.launch_persistent_context(
+                user_data_dir=user_data_dir,
+                accept_downloads=True,
+                headless=headless,
+                proxy=playwright_proxy,  # type: ignore
+                viewport={"width": 1920, "height": 1080},
+                user_agent=user_agent,
+            )
+            return browser_context
+        else:
+            browser = await chromium.launch(headless=headless, proxy=playwright_proxy)  # type: ignore
+            browser_context = await browser.new_context(
+                viewport={"width": 1920, "height": 1080}, user_agent=user_agent
+            )
+            return browser_context
+
+    async def close(self):
+        """Close browser context"""
+        await self.browser_context.close()
+        logger.info("Browser context closed ...")
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/exception.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/exception.py
new file mode 100644
index 0000000..1a8642e
--- /dev/null
+++ b/apps/api/src/services/scrapers/xiaohongshu/xhs/exception.py
@@ -0,0 +1,9 @@
+from httpx import RequestError
+
+
+class DataFetchError(RequestError):
+    """something error when fetch"""
+
+
+class IPBlockError(RequestError):
+    """fetch so fast that the server block us ip"""
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/field.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/field.py
new file mode 100644
index 0000000..fbac0d2
--- /dev/null
+++ b/apps/api/src/services/scrapers/xiaohongshu/xhs/field.py
@@ -0,0 +1,72 @@
+from enum import Enum
+from typing import NamedTuple
+
+
+class FeedType(Enum):
+    # 推荐
+    RECOMMEND = "homefeed_recommend"
+    # 穿搭
+    FASION = "homefeed.fashion_v3"
+    # 美食
+    FOOD = "homefeed.food_v3"
+    # 彩妆
+    COSMETICS = "homefeed.cosmetics_v3"
+    # 影视
+    MOVIE = "homefeed.movie_and_tv_v3"
+    # 职场
+    CAREER = "homefeed.career_v3"
+    # 情感
+    EMOTION = "homefeed.love_v3"
+    # 家居
+    HOURSE = "homefeed.household_product_v3"
+    # 游戏
+    GAME = "homefeed.gaming_v3"
+    # 旅行
+    TRAVEL = "homefeed.travel_v3"
+    # 健身
+    FITNESS = "homefeed.fitness_v3"
+
+
+class NoteType(Enum):
+    NORMAL = "normal"
+    VIDEO = "video"
+
+
+class SearchSortType(Enum):
+    """search sort type"""
+    # default
+    GENERAL = "general"
+    # most popular
+    MOST_POPULAR = "popularity_descending"
+    # Latest
+    LATEST = "time_descending"
+
+
+class SearchNoteType(Enum):
+    """search note type
+    """
+    # default
+    ALL = 0
+    # only video
+    VIDEO = 1
+    # only image
+    IMAGE = 2
+
+
+class Note(NamedTuple):
+    """note tuple"""
+    note_id: str
+    title: str
+    desc: str
+    type: str
+    user: dict
+    img_urls: list
+    video_url: str
+    tag_list: list
+    at_user_list: list
+    collected_count: str
+    comment_count: str
+    liked_count: str
+    share_count: str
+    time: int
+    last_update_time: int
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/help.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/help.py
new file mode 100644
index 0000000..c1e191f
--- /dev/null
+++ b/apps/api/src/services/scrapers/xiaohongshu/xhs/help.py
@@ -0,0 +1,262 @@
+import ctypes
+import json
+import random
+import time
+import urllib.parse
+
+
+def sign(a1="", b1="", x_s="", x_t=""):
+    """
+    takes in a URI (uniform resource identifier), an optional data dictionary, and an optional ctime parameter. It returns a dictionary containing two keys: "x-s" and "x-t".
+    """
+    common = {
+        "s0": 5,  # getPlatformCode
+        "s1": "",
+        "x0": "1",  # localStorage.getItem("b1b1")
+        "x1": "3.3.0",  # version
+        "x2": "Windows",
+        "x3": "xhs-pc-web",
+        "x4": "1.4.4",
+        "x5": a1,  # cookie of a1
+        "x6": x_t,
+        "x7": x_s,
+        "x8": b1,  # localStorage.getItem("b1")
+        "x9": mrc(x_t + x_s + b1),
+        "x10": 1,  # getSigCount
+    }
+    encode_str = encodeUtf8(json.dumps(common, separators=(',', ':')))
+    x_s_common = b64Encode(encode_str)
+    x_b3_traceid = get_b3_trace_id()
+    return {
+        "x-s": x_s,
+        "x-t": x_t,
+        "x-s-common": x_s_common,
+        "x-b3-traceid": x_b3_traceid
+    }
+
+
+def get_b3_trace_id():
+    re = "abcdef0123456789"
+    je = 16
+    e = ""
+    for t in range(16):
+        e += re[random.randint(0, je - 1)]
+    return e
+
+
+def mrc(e):
+    ie = [
+        0, 1996959894, 3993919788, 2567524794, 124634137, 1886057615, 3915621685,
+        2657392035, 249268274, 2044508324, 3772115230, 2547177864, 162941995,
+        2125561021, 3887607047, 2428444049, 498536548, 1789927666, 4089016648,
+        2227061214, 450548861, 1843258603, 4107580753, 2211677639, 325883990,
+        1684777152, 4251122042, 2321926636, 335633487, 1661365465, 4195302755,
+        2366115317, 997073096, 1281953886, 3579855332, 2724688242, 1006888145,
+        1258607687, 3524101629, 2768942443, 901097722, 1119000684, 3686517206,
+        2898065728, 853044451, 1172266101, 3705015759, 2882616665, 651767980,
+        1373503546, 3369554304, 3218104598, 565507253, 1454621731, 3485111705,
+        3099436303, 671266974, 1594198024, 3322730930, 2970347812, 795835527,
+        1483230225, 3244367275, 3060149565, 1994146192, 31158534, 2563907772,
+        4023717930, 1907459465, 112637215, 2680153253, 3904427059, 2013776290,
+        251722036, 2517215374, 3775830040, 2137656763, 141376813, 2439277719,
+        3865271297, 1802195444, 476864866, 2238001368, 4066508878, 1812370925,
+        453092731, 2181625025, 4111451223, 1706088902, 314042704, 2344532202,
+        4240017532, 1658658271, 366619977, 2362670323, 4224994405, 1303535960,
+        984961486, 2747007092, 3569037538, 1256170817, 1037604311, 2765210733,
+        3554079995, 1131014506, 879679996, 2909243462, 3663771856, 1141124467,
+        855842277, 2852801631, 3708648649, 1342533948, 654459306, 3188396048,
+        3373015174, 1466479909, 544179635, 3110523913, 3462522015, 1591671054,
+        702138776, 2966460450, 3352799412, 1504918807, 783551873, 3082640443,
+        3233442989, 3988292384, 2596254646, 62317068, 1957810842, 3939845945,
+        2647816111, 81470997, 1943803523, 3814918930, 2489596804, 225274430,
+        2053790376, 3826175755, 2466906013, 167816743, 2097651377, 4027552580,
+        2265490386, 503444072, 1762050814, 4150417245, 2154129355, 426522225,
+        1852507879, 4275313526, 2312317920, 282753626, 1742555852, 4189708143,
+        2394877945, 397917763, 1622183637, 3604390888, 2714866558, 953729732,
+        1340076626, 3518719985, 2797360999, 1068828381, 1219638859, 3624741850,
+        2936675148, 906185462, 1090812512, 3747672003, 2825379669, 829329135,
+        1181335161, 3412177804, 3160834842, 628085408, 1382605366, 3423369109,
+        3138078467, 570562233, 1426400815, 3317316542, 2998733608, 733239954,
+        1555261956, 3268935591, 3050360625, 752459403, 1541320221, 2607071920,
+        3965973030, 1969922972, 40735498, 2617837225, 3943577151, 1913087877,
+        83908371, 2512341634, 3803740692, 2075208622, 213261112, 2463272603,
+        3855990285, 2094854071, 198958881, 2262029012, 4057260610, 1759359992,
+        534414190, 2176718541, 4139329115, 1873836001, 414664567, 2282248934,
+        4279200368, 1711684554, 285281116, 2405801727, 4167216745, 1634467795,
+        376229701, 2685067896, 3608007406, 1308918612, 956543938, 2808555105,
+        3495958263, 1231636301, 1047427035, 2932959818, 3654703836, 1088359270,
+        936918000, 2847714899, 3736837829, 1202900863, 817233897, 3183342108,
+        3401237130, 1404277552, 615818150, 3134207493, 3453421203, 1423857449,
+        601450431, 3009837614, 3294710456, 1567103746, 711928724, 3020668471,
+        3272380065, 1510334235, 755167117,
+    ]
+    o = -1
+
+    def right_without_sign(num: int, bit: int=0) -> int:
+        val = ctypes.c_uint32(num).value >> bit
+        MAX32INT = 4294967295
+        return (val + (MAX32INT + 1)) % (2 * (MAX32INT + 1)) - MAX32INT - 1
+
+    for n in range(57):
+        o = ie[(o & 255) ^ ord(e[n])] ^ right_without_sign(o, 8)
+    return o ^ -1 ^ 3988292384
+
+
+lookup = [
+    "Z",
+    "m",
+    "s",
+    "e",
+    "r",
+    "b",
+    "B",
+    "o",
+    "H",
+    "Q",
+    "t",
+    "N",
+    "P",
+    "+",
+    "w",
+    "O",
+    "c",
+    "z",
+    "a",
+    "/",
+    "L",
+    "p",
+    "n",
+    "g",
+    "G",
+    "8",
+    "y",
+    "J",
+    "q",
+    "4",
+    "2",
+    "K",
+    "W",
+    "Y",
+    "j",
+    "0",
+    "D",
+    "S",
+    "f",
+    "d",
+    "i",
+    "k",
+    "x",
+    "3",
+    "V",
+    "T",
+    "1",
+    "6",
+    "I",
+    "l",
+    "U",
+    "A",
+    "F",
+    "M",
+    "9",
+    "7",
+    "h",
+    "E",
+    "C",
+    "v",
+    "u",
+    "R",
+    "X",
+    "5",
+]
+
+
+def tripletToBase64(e):
+    return (
+            lookup[63 & (e >> 18)] +
+            lookup[63 & (e >> 12)] +
+            lookup[(e >> 6) & 63] +
+            lookup[e & 63]
+    )
+
+
+def encodeChunk(e, t, r):
+    m = []
+    for b in range(t, r, 3):
+        n = (16711680 & (e[b] << 16)) + \
+            ((e[b + 1] << 8) & 65280) + (e[b + 2] & 255)
+        m.append(tripletToBase64(n))
+    return ''.join(m)
+
+
+def b64Encode(e):
+    P = len(e)
+    W = P % 3
+    U = []
+    z = 16383
+    H = 0
+    Z = P - W
+    while H < Z:
+        U.append(encodeChunk(e, H, Z if H + z > Z else H + z))
+        H += z
+    if 1 == W:
+        F = e[P - 1]
+        U.append(lookup[F >> 2] + lookup[(F << 4) & 63] + "==")
+    elif 2 == W:
+        F = (e[P - 2] << 8) + e[P - 1]
+        U.append(lookup[F >> 10] + lookup[63 & (F >> 4)] +
+                 lookup[(F << 2) & 63] + "=")
+    return "".join(U)
+
+
+def encodeUtf8(e):
+    b = []
+    m = urllib.parse.quote(e, safe='~()*!.\'')
+    w = 0
+    while w < len(m):
+        T = m[w]
+        if T == "%":
+            E = m[w + 1] + m[w + 2]
+            S = int(E, 16)
+            b.append(S)
+            w += 2
+        else:
+            b.append(ord(T[0]))
+        w += 1
+    return b
+
+
+def base36encode(number, alphabet='0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'):
+    """Converts an integer to a base36 string."""
+    if not isinstance(number, int):
+        raise TypeError('number must be an integer')
+
+    base36 = ''
+    sign = ''
+
+    if number < 0:
+        sign = '-'
+        number = -number
+
+    if 0 <= number < len(alphabet):
+        return sign + alphabet[number]
+
+    while number != 0:
+        number, i = divmod(number, len(alphabet))
+        base36 = alphabet[i] + base36
+
+    return sign + base36
+
+
+def base36decode(number):
+    return int(number, 36)
+
+
+def get_search_id():
+    e = int(time.time() * 1000) << 64
+    t = int(random.uniform(0, 2147483646))
+    return base36encode((e + t))
+
+
+if __name__ == '__main__':
+    a = get_b3_trace_id()
+    print(a)
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/login.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/login.py
new file mode 100644
index 0000000..fb671b0
--- /dev/null
+++ b/apps/api/src/services/scrapers/xiaohongshu/xhs/login.py
@@ -0,0 +1,132 @@
+import asyncio
+import functools
+import sys
+from typing import Optional
+
+# import redis
+from playwright.async_api import BrowserContext, Page
+from tenacity import RetryError, retry, retry_if_result, stop_after_attempt, wait_fixed
+
+from fastfetchbot_shared.utils.logger import logger
+
+# import config
+from .base_crawler import AbstractLogin
+from . import utils
+
+
+class XHSLogin(AbstractLogin):
+    def __init__(
+        self,
+        login_type: str,
+        browser_context: BrowserContext,
+        context_page: Page,
+        login_phone: Optional[str] = "",
+        cookie_str: dict = {},
+    ):
+        self.login_type = login_type
+        self.browser_context = browser_context
+        self.context_page = context_page
+        self.login_phone = login_phone
+        self.cookie_str = cookie_str
+
+    @retry(
+        stop=stop_after_attempt(20),
+        wait=wait_fixed(1),
+        retry=retry_if_result(lambda value: value is False),
+    )
+    async def check_login_state(self, no_logged_in_session: str) -> bool:
+        """
+        Check if the current login status is successful and return True otherwise return False
+        retry decorator will retry 20 times if the return value is False, and the retry interval is 1 second
+        if max retry times reached, raise RetryError
+        """
+        current_cookie = await self.browser_context.cookies()
+        _, cookie_dict = utils.convert_cookies(current_cookie)
+        current_web_session = cookie_dict.get("web_session")
+        if current_web_session != no_logged_in_session:
+            return True
+        return False
+
+    async def begin(self):
+        """Start login xiaohongshu"""
+        logger.info("Begin login xiaohongshu ...")
+        if self.login_type == "qrcode":
+            await self.login_by_qrcode()
+        elif self.login_type == "phone":
+            await self.login_by_mobile()
+        elif self.login_type == "cookie":
+            await self.login_by_cookies()
+        else:
+            raise ValueError(
+                "Invalid Login Type Currently only supported qrcode or phone or cookies ..."
+            )
+
+    async def login_by_mobile(self):
+        pass
+
+    async def login_by_qrcode(self):
+        """login xiaohongshu website and keep webdriver login state"""
+        logger.info("Begin login xiaohongshu by qrcode ...")
+        # login_selector = "div.login-container > div.left > div.qrcode > img"
+        qrcode_img_selector = "xpath=//img[@class='qrcode-img']"
+        # find login qrcode
+        base64_qrcode_img = await utils.find_login_qrcode(
+            self.context_page, selector=qrcode_img_selector
+        )
+        if not base64_qrcode_img:
+            logger.info("login failed , have not found qrcode please check ....")
+            # if this website does not automatically popup login dialog box, we will manual click login button
+            await asyncio.sleep(0.5)
+            login_button_ele = self.context_page.locator(
+                "xpath=//*[@id='app']/div[1]/div[2]/div[1]/ul/div[1]/button"
+            )
+            await login_button_ele.click()
+            base64_qrcode_img = await utils.find_login_qrcode(
+                self.context_page, selector=qrcode_img_selector
+            )
+            if not base64_qrcode_img:
+                sys.exit()
+
+        # get not logged session
+        current_cookie = await self.browser_context.cookies()
+        _, cookie_dict = utils.convert_cookies(current_cookie)
+        no_logged_in_session = cookie_dict.get("web_session")
+
+        # show login qrcode
+        # fix issue #12
+        # we need to use partial function to call show_qrcode function and run in executor
+        # then current asyncio event loop will not be blocked
+        partial_show_qrcode = functools.partial(utils.show_qrcode, base64_qrcode_img)
+        asyncio.get_running_loop().run_in_executor(
+            executor=None, func=partial_show_qrcode
+        )
+
+        logger.info(f"waiting for scan code login, remaining time is 20s")
+        try:
+            await self.check_login_state(no_logged_in_session)
+        except RetryError:
+            logger.info("Login xiaohongshu failed by qrcode login method ...")
+            sys.exit()
+
+        wait_redirect_seconds = 5
+        logger.info(
+            f"Login successful then wait for {wait_redirect_seconds} seconds redirect ..."
+        )
+        await asyncio.sleep(wait_redirect_seconds)
+
+    async def login_by_cookies(self):
+        """login xiaohongshu website by cookies"""
+        logger.info("Begin login xiaohongshu by cookie ...")
+        for key, value in self.cookie_str.items():
+            if key != "web_session":  # only set web_session cookie attr
+                continue
+            await self.browser_context.add_cookies(
+                [
+                    {
+                        "name": key,
+                        "value": value,
+                        "domain": ".xiaohongshu.com",
+                        "path": "/",
+                    }
+                ]
+            )
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/proxy_account_pool.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/proxy_account_pool.py
new file mode 100644
index 0000000..87c9202
--- /dev/null
+++ b/apps/api/src/services/scrapers/xiaohongshu/xhs/proxy_account_pool.py
@@ -0,0 +1,132 @@
+from typing import List, Optional, Set, Tuple
+
+from src import config
+
+
+class PhonePool:
+    """phone pool class"""
+
+    def __init__(self) -> None:
+        self.phones: List[str] = []
+        self.used_phones: Set[str] = set()
+
+    def add_phone(self, phone: str) -> bool:
+        """add phone to the pool"""
+        if phone not in self.phones:
+            self.phones.append(phone)
+            return True
+        return False
+
+    def remove_phone(self, phone: str) -> bool:
+        """remove phone from the pool"""
+        if phone in self.used_phones:
+            self.phones.remove(phone)
+            self.used_phones.remove(phone)
+            return True
+        return False
+
+    def get_phone(self) -> Optional[str]:
+        """get phone and mark as used"""
+        if self.phones:
+            left_phone = self.phones.pop(0)
+            self.used_phones.add(left_phone)
+            return left_phone
+        return None
+
+    def clear(self):
+        """clear phone pool"""
+        self.phones = []
+        self.used_phones = set()
+
+
+class IPPool:
+    def __init__(self) -> None:
+        self.ips: List[str] = []
+        self.used_ips: Set[str] = set()
+
+    def add_ip(self, ip):
+        """添加ip"""
+        if ip not in self.ips:
+            self.ips.append(ip)
+            return True
+        return False
+
+    def remove_ip(self, ip: str) -> bool:
+        """remove ip"""
+        if ip in self.used_ips:
+            self.ips.remove(ip)
+            self.used_ips.remove(ip)
+            return True
+        return False
+
+    def get_ip(self) -> Optional[str]:
+        """get ip and mark as used"""
+        if self.ips:
+            left_ips = self.ips.pop(0)
+            self.used_ips.add(left_ips)
+            return left_ips
+        return None
+
+    def clear(self):
+        """clear ip pool"""
+        self.ips = []
+        self.used_ips = set()
+
+
+class AccountPool:
+    """account pool class"""
+
+    def __init__(self):
+        self.phone_pool = PhonePool()
+        self.ip_pool = IPPool()
+
+    def add_account(self, phone: str, ip: str) -> bool:
+        """add account to pool with phone and ip"""
+        if self.phone_pool.add_phone(phone) and self.ip_pool.add_ip(ip):
+            return True
+        return False
+
+    def remove_account(self, phone: str, ip: str) -> bool:
+        """remove account from pool"""
+        if self.phone_pool.remove_phone(phone) and self.ip_pool.remove_ip(ip):
+            return True
+        return False
+
+    def get_account(self) -> Tuple[str, str]:
+        """get account if no account, reload account pool"""
+        phone = self.phone_pool.get_phone()
+        ip = self.ip_pool.get_ip()
+        # if not phone or not ip:
+        #     reload_account_pool(self)
+        #     return self.get_account()
+        return phone, ip
+
+    def clear_account(self):
+        """clear account pool"""
+        self.phone_pool.clear()
+        self.ip_pool.clear()
+
+
+def reload_account_pool(apo: AccountPool):
+    """reload account pool"""
+    apo.clear_account()
+    for phone, ip in zip(config.XHS_PHONE_LIST, config.XHS_IP_PROXY_LIST):
+        apo.add_account(phone, ip)
+
+
+def create_account_pool() -> AccountPool:
+    """create account pool"""
+    apo = AccountPool()
+    reload_account_pool(apo=apo)
+    return apo
+
+
+if __name__ == "__main__":
+    import time
+
+    ac_pool = create_account_pool()
+    p, i = ac_pool.get_account()
+    while p:
+        print(f"get phone:{p}, ip proxy:{i} from account pool")
+        p, i = ac_pool.get_account()
+        time.sleep(1)
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/utils.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/utils.py
new file mode 100644
index 0000000..43c2f44
--- /dev/null
+++ b/apps/api/src/services/scrapers/xiaohongshu/xhs/utils.py
@@ -0,0 +1,146 @@
+import base64
+import logging
+import os
+import random
+import re
+import time
+from io import BytesIO
+from typing import Dict, List, Optional, Tuple
+from urllib.parse import urlparse
+
+import httpx
+
+from PIL import Image, ImageDraw
+from playwright.async_api import Cookie, Page
+
+
+async def find_login_qrcode(page: Page, selector: str) -> str:
+    """find login qrcode image from target selector"""
+    try:
+        elements = await page.wait_for_selector(
+            selector=selector,
+        )
+        login_qrcode_img = await elements.get_property("src")  # type: ignore
+        return str(login_qrcode_img)
+
+    except Exception as e:
+        print(e)
+        return ""
+
+
+def show_qrcode(qr_code) -> None:  # type: ignore
+    """parse base64 encode qrcode image and show it"""
+    qr_code = qr_code.split(",")[1]
+    qr_code = base64.b64decode(qr_code)
+    image = Image.open(BytesIO(qr_code))
+
+    # Add a square border around the QR code and display it within the border to improve scanning accuracy.
+    width, height = image.size
+    new_image = Image.new("RGB", (width + 20, height + 20), color=(255, 255, 255))
+    new_image.paste(image, (10, 10))
+    draw = ImageDraw.Draw(new_image)
+    draw.rectangle((0, 0, width + 19, height + 19), outline=(0, 0, 0), width=1)
+    new_image.show()
+
+
+def get_user_agent() -> str:
+    ua_list = [
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
+        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.53 Safari/537.36",
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36",
+    ]
+    return random.choice(ua_list)
+
+
+def convert_cookies(cookies: Optional[List[Cookie]]) -> Tuple[str, Dict]:
+    if not cookies:
+        return "", {}
+    cookies_str = ";".join(
+        [f"{cookie.get('name')}={cookie.get('value')}" for cookie in cookies]
+    )
+    cookie_dict = dict()
+    for cookie in cookies:
+        cookie_dict[cookie.get("name")] = cookie.get("value")
+    return cookies_str, cookie_dict
+
+
+def convert_str_cookie_to_dict(cookie_str: str) -> Dict:
+    cookie_dict: Dict[str, str] = dict()
+    if not cookie_str:
+        return cookie_dict
+    for cookie in cookie_str.split(";"):
+        cookie = cookie.strip()
+        if not cookie:
+            continue
+        cookie_list = cookie.split("=")
+        if len(cookie_list) != 2:
+            continue
+        cookie_value = cookie_list[1]
+        if isinstance(cookie_value, list):
+            cookie_value = "".join(cookie_value)
+        cookie_dict[cookie_list[0]] = cookie_value
+    return cookie_dict
+
+
+def get_current_timestamp():
+    return int(time.time() * 1000)
+
+
+def match_interact_info_count(count_str: str) -> int:
+    if not count_str:
+        return 0
+
+    match = re.search(r"\d+", count_str)
+    if match:
+        number = match.group()
+        return int(number)
+    else:
+        return 0
+
+
+def init_loging_config():
+    level = logging.INFO
+    logging.basicConfig(
+        level=level,
+        format="%(asctime)s %(name)s %(levelname)s %(message)s ",
+        datefmt="%Y-%m-%d  %H:%M:%S",
+    )
+    _logger = logging.getLogger("MediaCrawler")
+    _logger.setLevel(level)
+    return _logger
+
+
+logger = init_loging_config()
+
+
+def get_track_simple(distance) -> List[int]:
+    track: List[int] = []
+    current = 0
+    mid = distance * 4 / 5
+    t = 0.2
+    v = 1
+
+    while current < distance:
+        if current < mid:
+            a = 4
+        else:
+            a = -3
+        v0 = v
+        v = v0 + a * t  # type: ignore
+        move = v0 * t + 1 / 2 * a * t * t
+        current += move  # type: ignore
+        track.append(round(move))
+    return track
+
+
+def get_tracks(distance: int, level: str = "easy") -> List[int]:
+    if level == "easy":
+        return get_track_simple(distance)
+    else:
+        from . import easing
+
+        _, tricks = easing.get_tracks(distance, seconds=2, ease_func="ease_out_expo")
+        return tricks
diff --git a/apps/api/src/services/scrapers/zhihu/__init__.py b/apps/api/src/services/scrapers/zhihu/__init__.py
new file mode 100644
index 0000000..33e5284
--- /dev/null
+++ b/apps/api/src/services/scrapers/zhihu/__init__.py
@@ -0,0 +1,792 @@
+import json
+import re
+import traceback
+from typing import Dict, Optional, Any
+from urllib.parse import urlparse
+
+import httpx
+import jmespath
+from bs4 import BeautifulSoup
+from lxml import etree, html
+
+from fastfetchbot_shared.utils.parse import (
+    get_html_text_length,
+    format_telegram_short_text,
+    unix_timestamp_to_utc,
+    wrap_text_into_html,
+)
+from fastfetchbot_shared.utils.network import get_selector, get_redirect_url, get_response_json, get_random_user_agent, \
+    get_content_async, get_response
+from fastfetchbot_shared.models.metadata_item import MetadataItem, MediaFile, MessageType
+from src.config import JINJA2_ENV, FXZHIHU_HOST
+from .config import (
+    SHORT_LIMIT,
+    ZHIHU_COLUMNS_API_HOST,
+    ZHIHU_API_HOST,
+    ZHIHU_HOST,
+    ALL_METHODS,
+    ZHIHU_COOKIES,
+    ZHIHU_API_ANSWER_PARAMS
+)
+from fastfetchbot_shared.utils.logger import logger
+
+environment = JINJA2_ENV
+short_text_template = environment.get_template("zhihu_short_text.jinja2")
+content_template = environment.get_template("zhihu_content.jinja2")
+zhihu_client = httpx.AsyncClient()
+
+
+def _parse_answer_api_json_data(data: Dict) -> Dict:
+    expression = f"""{{
+            question_id: question.id,
+            title: question.title,
+            question_detail: question.detail,
+            answer_count: question.answer_count,
+            follower_count: question.follower_count,
+            question_created: question.created,
+            question_updated: question.updated_time,
+            author: author.name,
+            author_url_token: author.url_token,
+            content: content,
+            created: created_time
+            updated: updated_time,
+            comment_count: comment_count,
+            voteup_count: voteup_count,
+            ip_info: ipInfo
+        }}"""
+    result = jmespath.search(expression, data)
+    return result
+
+
+def _fix_json_quotes(raw_str):
+    """
+        通用修复函数：
+        1. 修复物理换行
+        2. 修复 key: value 结构中 value 内部未正确转义的引号
+        3. 修复特殊的 href="null" 等非法结构
+        """
+
+    raw_str = raw_str.replace('\n', '\\n').replace('\r', '\\r')
+    raw_str = re.sub(r'href="([^\\].*?)"', r'href=\\"\1\\"', raw_str)
+
+    target_keys = ['content','detail']
+
+    for key in target_keys:
+        pattern = r'("' + key + r'":\s*")(.*?)("(?=,"[a-z_]+":))'
+
+        def replace_inner_quotes(match):
+            prefix = match.group(1)
+            body = match.group(2)
+            suffix = match.group(3)
+
+            fixed_body = body.replace('\\"', '"').replace('\\&quot;', '').replace('"', '\\"')
+
+            return prefix + fixed_body + suffix
+
+        raw_str = re.sub(pattern, replace_inner_quotes, raw_str, flags=re.DOTALL)
+
+    return raw_str
+
+
+class Zhihu(MetadataItem):
+    def __init__(self, url: str, data: Optional[Any] = None, **kwargs):
+        # metadata fields
+        self.url: url = url
+        self.title: str = ""
+        self.author: str = ""
+        self.author_url: str = ""
+        self.text: str = ""
+        self.content: str = ""
+        self.media_files: list[MediaFile] = []
+        self.category = "zhihu"
+        self.message_type: MessageType = MessageType.SHORT
+        # auxiliary fields
+        self.item_title = ""
+        self.item_url = ""
+        self.group_name = ""
+        self.group_url = ""
+        self.zhihu_type = ""
+        self.text_group = ""
+        self.raw_content = ""
+        self.date = ""
+        self.updated = ""
+        self.retweet_html = ""
+        self.upvote: int = 0
+        self.retweeted: bool = False
+        # reqeust fields
+        self.httpx_client = zhihu_client
+        self.headers = {"User-Agent": get_random_user_agent(),
+                        "Accept": "*/*",
+                        "Referer": self.url,
+                        "Connection": "keep-alive",
+                        }
+        if kwargs.get("cookie"):
+            self.headers["Cookie"] = kwargs.get("cookie")
+        if ZHIHU_COOKIES:
+            self.headers["Cookie"] = ZHIHU_COOKIES
+        self.method = kwargs.get("method", "fxzhihu")
+        self.urlparser = urlparse(self.url)
+        self.api_url = ""
+        self.status_id = ""
+        self.answer_id = ""
+        self.question_id = ""
+        # other hard-coded fields
+        self.zhihu_type_translate = {
+            "article": "专栏文章",
+            "answer": "回答",
+            "status": "想法",
+        }
+
+    async def get_item(self) -> dict:
+        await self.get_zhihu()
+        return self.to_dict()
+
+    async def get_zhihu(self) -> None:
+        """
+        Main function.
+        Get the zhihu item and return the metadata dict.
+        :return: Dict
+        """
+        await self._get_zhihu_item()
+
+    async def _get_zhihu_item(self) -> None:
+        """
+        Get zhihu item via the corresponding method according to the zhihu type.
+        """
+        await self._check_zhihu_type()
+        function_dict = {
+            "answer": self._get_zhihu_answer,
+            "article": self._get_zhihu_article,
+            "status": self._get_zhihu_status,
+            "unknown": None,
+        }
+        for method in ALL_METHODS:
+            try:
+                if self.method not in ALL_METHODS:
+                    self.method = "json"
+                else:
+                    self.method = method
+                await self._get_request_url()
+                await function_dict[self.zhihu_type]()
+                if self.title != "":
+                    break
+            except Exception as e:
+                traceback.print_exc()
+                if method == ALL_METHODS[-1]:
+                    print("all methods failed")
+                    raise e
+                else:
+                    print(
+                        f"zhihu {self.zhihu_type} {self.method} failed, try the next method"
+                    )
+                continue
+        self._zhihu_short_text_process()
+        self._zhihu_content_process()
+        self.message_type = (
+            MessageType.LONG
+            if get_html_text_length(self.text) > SHORT_LIMIT
+            else MessageType.SHORT
+        )
+
+    async def _check_zhihu_type(self) -> None:
+        """
+        Check the zhihu type of the url. The zhihu type can be one of the following:
+        - answer (example: https://www.zhihu.com/question/19998424/answer/603067076)
+        - article (example: https://zhuanlan.zhihu.com/p/35142635)
+        - status (example: https://www.zhihu.com/pin/1667965059081945088)
+        """
+        host = self.urlparser.netloc
+        path = self.urlparser.path
+        logger.debug(
+            f"""
+        host: {host}
+        path: {path}
+        """
+        )
+        if host.startswith("zhuanlan."):
+            self.zhihu_type = "article"
+            self.article_id = self.urlparser.path.split("/")[-1]
+        elif path.startswith("/answer/") or (path.startswith("/question/") and path.find("/answer/") != -1):
+            self.zhihu_type = "answer"
+            self.answer_id = self.urlparser.path.split("/")[-1]
+            if path.find("/question/") != -1:
+                self.question_id = self.urlparser.path.split("/")[-3]
+            # self.method = "json"
+        elif path.startswith("/pin/"):
+            self.zhihu_type = "status"
+            self.status_id = self.urlparser.path.split("/")[-1]
+            # self.method = "api"
+        else:
+            self.zhihu_type = "unknown"
+        self.url = f"https://{host}{path}"
+
+    async def _get_request_url(self) -> None:
+        host = self.urlparser.netloc
+        path = self.urlparser.path
+        request_url_path = path
+        if self.method == "fxzhihu":
+            self.headers["Content-Type"] = "text/html"
+            if self.zhihu_type == "answer":
+                if self.question_id:
+                    self.request_url = (
+                            "https://" + FXZHIHU_HOST + '/question/' + self.question_id + '/answer/' + self.answer_id
+                    )
+                    return
+                self.request_url = (
+                        "https://" + FXZHIHU_HOST + '/answer/' + self.answer_id
+                )
+                return
+            elif self.zhihu_type == "article":
+                self.request_url = (
+                        "https://" + FXZHIHU_HOST + '/p/' + self.article_id
+                )
+                return
+            elif self.zhihu_type == "status":
+                self.request_url = (
+                        "https://" + FXZHIHU_HOST + '/pin/' + self.status_id
+                )
+                return
+        if self.zhihu_type == "answer":
+            if self.method == "api":
+                self.request_url = (
+                        ZHIHU_API_HOST
+                        + "/answers/"
+                        + self.answer_id
+                        + "?"
+                        + ZHIHU_API_ANSWER_PARAMS
+                )
+                return
+            else:
+                if path.find("question") != -1:
+                    self.question_id = self.urlparser.path.split("/")[-3]
+                else:
+                    await self._get_question_id()
+                request_url_path = "/aria/question/" + self.question_id + "/answer/" + self.answer_id
+        elif self.zhihu_type == "article":
+            if self.method == "api":
+                self.request_url = (
+                        ZHIHU_COLUMNS_API_HOST
+                        + "/articles/"
+                        + self.article_id
+                        + "?"
+                        + ZHIHU_API_ANSWER_PARAMS
+                )
+                return
+                # TODO: There are two api url to get a single article. The first one may fail in the future.
+                # Therefore, I remain the second one.
+                # self.request_url = (
+                #    ZHIHU_COLUMNS_API_HOST_V2 + self.article_id + "?" + ZHIHU_API_ANSWER_PARAMS)
+        elif self.zhihu_type == "status":
+            if self.method == "api":
+                self.request_url = (
+                        "https://www.zhihu.com/api/v4/pins/"
+                        + self.urlparser.path.split("/")[-1]
+                )
+                return
+        self.request_url = f"https://{host}{request_url_path}"
+
+    async def _get_zhihu_answer(self) -> None:
+        """
+        parse the zhihu answer page and get the metadata.
+        support methods: html, json. Recommend: json
+        """
+        if self.method in ["api", "json", "fxzhihu"]:
+            answer_data = {}
+            if self.method == "api":
+                try:
+                    json_data = await get_response_json(self.request_url, headers=self.headers,
+                                                        client=self.httpx_client)
+                    logger.debug(f"json data: {json_data}")
+                    answer_data = _parse_answer_api_json_data(json_data)
+                    logger.debug(f"answer data: {answer_data}")
+                except Exception as e:
+                    raise Exception("Cannot get the answer by API")
+            elif self.method == "fxzhihu":
+                try:
+                    resp = await get_response(url=self.request_url, headers=self.headers, client=self.httpx_client)
+                    json_data = json.loads(_fix_json_quotes(resp.text))
+                    logger.debug(f"json data: {json_data}")
+                    answer_data = _parse_answer_api_json_data(json_data)
+                    logger.debug(f"answer data: {answer_data}")
+                except Exception as e:
+                    raise Exception("Cannot get the answer by fxzhihu, error: " + str(e))
+            elif self.method == "json":
+                try:
+                    selector = await get_selector(self.request_url, headers=self.headers)
+                    json_data = selector.xpath('string(//script[@id="js-initialData"])')
+                    json_data = json.loads(json_data)
+                    json_data = json_data["initialState"]["entities"]
+                    answer_data = self._parse_answer_json_data(json_data)
+                except Exception as e:
+                    raise Exception("Cannot get the selector")
+            if answer_data == {}:
+                raise Exception("Cannot get the answer")
+            self._resolve_answer_json_data(answer_data)
+        else:
+            try:
+                selector = await get_selector(self.request_url, headers=self.headers)
+                self.upvote = selector.xpath(
+                    'string(//button[contains(@class,"VoteButton")])'
+                )
+                self.raw_content = str(
+                    etree.tostring(
+                        selector.xpath(
+                            '//div[contains(@class,"RichContent-inner")]//span[contains(@class,"RichText") and @itemprop="text"]'
+                        )[0],
+                        encoding="utf-8",
+                    ),
+                    encoding="utf-8",
+                )
+                self.title = selector.xpath("string(//h1)")
+                self.author = selector.xpath(
+                    'string(//div[@class="AuthorInfo"]//meta[@itemprop="name"]/@content)'
+                )
+                self.author_url = selector.xpath(
+                    'string(//div[@class="AuthorInfo"]//meta[@itemprop="url"]/@content)'
+                )
+                if self.author_url == "https://www.zhihu.com/people/":
+                    self.author_url = ""
+            except Exception as e:
+                raise Exception("Cannot get the answer")
+        if (
+                self.title == ""
+        ):  # TODO: this is not a good way to check if the scraping is successful. To be improved.
+            raise Exception("Cannot get the answer")
+
+    async def _get_zhihu_status(self):
+        """
+        parse the zhihu status page and get the metadata.
+        support methods: api, html
+        """
+        if self.method in ["api", "fxzhihu"]:
+            json_data = await get_response_json(self.request_url, headers=self.headers, client=self.httpx_client)
+            data = self._resolve_status_api_data(json_data)  # TODO: separate the function to resolve the api data
+            self.author = data["author"]
+            self.author_url = data["author_url"]
+            self.title = data["author"] + "的想法"
+            self.raw_content = json_data["content_html"]
+            self.media_files.extend(data["media_files"])
+            self.date = unix_timestamp_to_utc(data["created"])
+            self.updated = unix_timestamp_to_utc(data["updated"])
+            self.upvote = data["like_count"]
+            if data["origin_pin_id"]:
+                self.retweeted = True
+                self.origin_pin_url = ZHIHU_HOST + "/pin/" + data["origin_pin_id"]
+                self.origin_pin_author = data["origin_pin_data"]["author"]
+                self.origin_pin_author_url = data["origin_pin_data"]["author_url"]
+                self.origin_pin_raw_content = data["origin_pin_data"]["raw_content"]
+                self.origin_pin_date = unix_timestamp_to_utc(data["origin_pin_data"]["created"])
+                self.origin_pin_updated = unix_timestamp_to_utc(data["origin_pin_data"]["updated"])
+                self.origin_pin_upvote = data["origin_pin_data"]["like_count"]
+                self.origin_pin_comment_count = data["origin_pin_data"]["comment_count"]
+                self.media_files.extend(data["origin_pin_data"]["media_files"])
+        else:
+            try:
+                selector = await get_selector(self.request_url, headers=self.headers)
+            except:
+                raise Exception("zhihu request failed")
+            if self.method == "json":
+                def _process_picture(pictures, content_attr):
+                    if not hasattr(self, content_attr):
+                        setattr(self, content_attr, "")
+                    for pic in pictures:
+                        if pic["type"] == "image":
+                            if pic["isGif"]:
+                                media_type = "gif"
+                                setattr(
+                                    self,
+                                    content_attr,
+                                    getattr(self, content_attr)
+                                    + f'<br><video controls="controls" src="{pic["originalUrl"]}"><br>',
+                                )
+                            else:
+                                media_type = "image"
+                                setattr(
+                                    self,
+                                    content_attr,
+                                    getattr(self, content_attr)
+                                    + f'<br><img src="{pic["originalUrl"]}"><br>',
+                                )
+                        elif pic["type"] == "video":
+                            media_type = "video"
+                            setattr(
+                                self,
+                                content_attr,
+                                getattr(self, content_attr)
+                                + f'<br><video controls="controls" src="{pic["originalUrl"]}"><br>',
+                            )
+                        media_item = MediaFile.from_dict(
+                            {
+                                "media_type": media_type,
+                                "url": pic["originalUrl"],
+                                "caption": "",
+                            }
+                        )
+                        self.media_files.append(media_item)
+
+                json_data = selector.xpath('string(//script[@id="js-initialData"])')
+                json_data = json.loads(json_data)["initialState"]["entities"]
+                status_data = self._parse_status_json_data(json_data)
+                if status_data["origin_pin_url"] is not None:
+                    self.retweeted = True
+                    self.origin_pin_url = status_data["origin_pin_url"]
+                    self.origin_pin_author = status_data["origin_pin_author"]
+                    self.origin_pin_author_url = (
+                            ZHIHU_HOST
+                            + "/people/"
+                            + status_data["origin_pin_author_url_token"]
+                    )
+                    self.origin_pin_raw_content = status_data["origin_pin_content"]
+                    self.origin_pin_date = unix_timestamp_to_utc(
+                        status_data["origin_pin_created"]
+                    )
+                    self.origin_pin_updated = unix_timestamp_to_utc(
+                        status_data["origin_pin_updated"]
+                    )
+                    self.origin_pin_upvote = status_data["origin_pin_like_count"]
+                    self.origin_pin_comment_count = status_data[
+                        "origin_pin_comment_count"
+                    ]
+                    _process_picture(
+                        status_data["origin_pin_pictures"], "origin_pin_pic_content"
+                    )
+                self.title = status_data["author"] + "的想法"
+                self.author = status_data["author"]
+                self.author_url = (
+                        ZHIHU_HOST + "/people/" + status_data["author_url_token"]
+                )
+                self.raw_content = status_data["content"]
+                self.date = unix_timestamp_to_utc(status_data["created"])
+                self.updated = unix_timestamp_to_utc(status_data["updated"])
+                self.upvote = status_data["like_count"]
+                self.comment_count = status_data["comment_count"]
+                _process_picture(status_data["pictures"], "pic_content")
+            elif self.method == "html":
+                self.raw_content = str(
+                    etree.tostring(
+                        selector.xpath(
+                            '//span[contains(@class,"RichText") and @itemprop="text"]'
+                        )[0],
+                        encoding="utf-8",
+                    ),
+                    encoding="utf-8",
+                )
+                self.upvote = selector.xpath(
+                    'string(//button[contains(@class,"VoteButton")]//span)'
+                )
+                self.date = selector.xpath(
+                    'string(//div[@class="ContentItem-time"]//span)'
+                )
+                if (
+                        selector.xpath(
+                            'string(//div[@class="RichContent"]/div[2]/div[2]/@class)'
+                        ).find("PinItem-content-originpin")
+                        != -1
+                ):  # check if the status is a retweet
+                    if (
+                            str(
+                                etree.tostring(
+                                    selector.xpath(
+                                        '//div[contains(@class,"PinItem-content-originpin")]/div[3]'
+                                    )[0],
+                                    encoding="utf-8",
+                                ),
+                                encoding="utf-8",
+                            )
+                            != '<div class="RichText ztext PinItem-remainContentRichText"/>'
+                    ):  # if the retweet content including pictures
+                        pic_html = html.fromstring(
+                            str(
+                                etree.tostring(
+                                    selector.xpath(
+                                        '//div[contains(@class,"PinItem-content-originpin")]'
+                                    )[0],
+                                    encoding="utf-8",
+                                ),
+                                encoding="utf-8",
+                            )
+                        )
+                        self.retweet_html = str(
+                            html.tostring(pic_html, pretty_print=True)
+                        ).replace("b'<div", "<div")
+                        print(type(self.retweet_html))
+                        print(self.retweet_html)
+                    else:
+                        self.retweet_html = str(
+                            etree.tostring(
+                                selector.xpath(
+                                    '//div[contains(@class,"PinItem-content-originpin")]'
+                                )[0],
+                                encoding="utf-8",
+                            ),
+                            encoding="utf-8",
+                        )
+                        print(self.retweet_html)
+                self.author = selector.xpath(
+                    'string(//div[@class="AuthorInfo"]//meta[@itemprop="name"]/@content)'
+                )
+                self.author_url = selector.xpath(
+                    'string(//div[@class="AuthorInfo"]//meta[@itemprop="url"]/@content)'
+                )
+                self.title = self.author + "的想法"
+
+    async def _get_zhihu_article(self):
+        self.zhihu_type = "article"
+        if self.method in ["api", "fxzhihu"]:
+            try:
+                json_data = await get_response_json(self.request_url, headers=self.headers, client=self.httpx_client)
+                self.title = json_data["title"]
+                self.raw_content = json_data["content"]
+                self.author = json_data["author"]["name"]
+                self.author_url = json_data["author"]["url"]
+                self.upvote = json_data["voteup_count"]
+            except Exception as e:
+                raise Exception("zhihu request failed")
+        else:
+            try:
+                selector = await get_selector(self.request_url, headers=self.headers)
+            except Exception as e:
+                raise Exception("zhihu request failed")
+            if self.method == "json":
+                json_data = selector.xpath('string(//script[@id="js-initialData"])')
+                json_data = json.loads(json_data)
+                json_data = json_data["initialState"]["entities"]
+                article_data = self._parse_article_json_data(json_data)
+                self.title = article_data["title"]
+                self.raw_content = article_data["content"]
+                self.author = article_data["author"]
+                self.author_url = (
+                        ZHIHU_HOST + "/people/" + article_data["author_url_token"]
+                )
+                self.upvote = article_data["voteup_count"]
+                self.comment_count = article_data["comment_count"]
+                self.date = unix_timestamp_to_utc(article_data["created"])
+                self.updated = unix_timestamp_to_utc(article_data["updated"])
+                self.column = article_data["column"]
+                self.column_url = article_data["column_url"]
+                self.column_intro = article_data["column_intro"]
+            elif self.method == "html":
+                self.title = selector.xpath("string(//h1)")
+                self.upvote = selector.xpath(
+                    'string(//button[@class="Button VoteButton VoteButton--up"])'
+                )
+                self.raw_content = str(
+                    etree.tostring(
+                        selector.xpath(
+                            '//div[contains(@class,"RichText") and contains(@class,"ztext")]'
+                        )[0],
+                        encoding="utf-8",
+                    ),
+                    encoding="utf-8",
+                )
+                self.author = selector.xpath(
+                    'string(//div[contains(@class,"AuthorInfo-head")]//a)'
+                )
+                self.author_url = "https:" + selector.xpath(
+                    'string(//a[@class="UserLink-link"]/@href)'
+                )
+
+    def _zhihu_short_text_process(self):
+        def _html_process(raw_html: str) -> str:
+            soup = BeautifulSoup(raw_html, "html.parser")
+            for img_tag in soup.find_all("img"):
+                if img_tag["src"].find("data:image") != -1:
+                    continue
+                if self.zhihu_type != "status":
+                    media_item = MediaFile.from_dict(
+                        {"media_type": "image", "url": img_tag["src"], "caption": ""}
+                    )
+                    self.media_files.append(media_item)
+                src_value = img_tag["src"]
+                img_tag.attrs.clear()
+                img_tag["src"] = src_value
+            for figure in soup.find_all("figure"):
+                figure.append(BeautifulSoup("<br>", "html.parser"))
+                figure.decompose()
+            for a_tag in soup.find_all("a"):
+                if not a_tag.has_attr("href"):
+                    a_tag.unwrap()
+                    continue
+                href_value = a_tag["href"]
+                a_tag.attrs.clear()
+                a_tag["href"] = href_value
+            for br_tag in soup.find_all("br"):
+                br_tag.replace_with("\n")
+            return str(soup)
+
+        data = self.__dict__
+        data["translated_zhihu_type"] = self.zhihu_type_translate[self.zhihu_type]
+        raw_content = self.raw_content.replace("</br></br>", "\n")
+        raw_content = _html_process(raw_content)
+        data["content"] = raw_content
+        if self.zhihu_type == "status" and self.retweeted:
+            origin_pin_content = self.origin_pin_raw_content.replace("</br></br>", "\n")
+            origin_pin_content = _html_process(origin_pin_content)
+            data["origin_pin_content"] = origin_pin_content
+        self.text = short_text_template.render(data=data)
+        soup = BeautifulSoup(self.text, "html.parser")
+        soup = format_telegram_short_text(soup)
+        for h_tag in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
+            if h_tag.text != "":
+                h_tag.append(BeautifulSoup("<br>", "html.parser"))
+            h_tag.unwrap()
+        for p in soup.find_all("p"):
+            if p.text != "":
+                p.append(BeautifulSoup("<br>", "html.parser"))
+            p.unwrap()
+        self.text = (
+            str(soup)
+            .replace("<br/>", "\n")
+            .replace("<br>", "\n")
+            .replace("<br />", "")
+            .replace("<hr/>", "\n")
+        )
+        if self.text.endswith("\n"):
+            self.text = self.text[:-1]
+
+    def _zhihu_content_process(self):
+        data = self.__dict__
+        data["raw_content"] = wrap_text_into_html(
+            data["raw_content"].replace("\n", "<br>"), True
+        )
+        self.content = content_template.render(data=data)
+
+    def _parse_answer_json_data(self, data: Dict) -> Dict:
+        expression = f"""{{
+                question_id: answers."{self.answer_id}".question.id,
+                author: answers."{self.answer_id}".author.name,
+                author_url_token: answers."{self.answer_id}".author.urlToken,
+                content: answers."{self.answer_id}".content,
+                created: answers."{self.answer_id}".createdTime
+                updated: answers."{self.answer_id}".updatedTime,
+                comment_count: answers."{self.answer_id}".commentCount,
+                voteup_count: answers."{self.answer_id}".voteupCount,
+                ip_info: answers."{self.answer_id}".ipInfo
+            }}"""
+        result = jmespath.search(expression, data)
+        self.question_id = result["question_id"]
+        expression = f"""{{
+                        "title": questions."{self.question_id}".title,
+                        "question_detail": questions."{self.question_id}".detail,
+                        "answer_count": questions."{self.question_id}".answerCount,
+                        "follower_count": questions."{self.question_id}".followerCount,
+                        "question_created": questions."{self.question_id}".created,
+                        "question_updated": questions."{self.question_id}".updatedTime
+                    }}"""
+        result.update(jmespath.search(expression, data))
+        return result
+
+    def _resolve_answer_json_data(self, answer_data: Dict) -> None:
+        self.question = answer_data["question_detail"] or ""
+        self.question_date = unix_timestamp_to_utc(
+            answer_data["question_created"] or ""
+        ) or ""
+        self.question_updated = unix_timestamp_to_utc(
+            answer_data["question_updated"] or ""
+        ) or ""
+        self.question_follower_count = answer_data["follower_count"] or 0
+        self.question_answer_count = answer_data["answer_count"] or 0
+        self.title = answer_data["title"] or ""
+        self.author = answer_data["author"] or ""
+        self.author_url = (
+                                  ZHIHU_HOST + "/people/" + answer_data["author_url_token"] or ""
+                          ) or ""
+        self.raw_content = answer_data["content"] or ""
+        self.date = unix_timestamp_to_utc(answer_data["created"] or "") or ""
+        self.updated = unix_timestamp_to_utc(answer_data["updated"] or "") or ""
+        self.comment_count = answer_data["comment_count"] or 0
+        self.upvote = answer_data["voteup_count"] or 0
+        self.ip_info = answer_data["ip_info"] or ""
+
+    def _parse_article_json_data(self, data: Dict) -> Dict:
+        expression = f"""{{
+            "title": articles."{self.article_id}".title,
+            "content": articles."{self.article_id}".content,
+            "author": articles."{self.article_id}".author.name,
+            "author_url_token": articles."{self.article_id}".author.urlToken,
+            "voteup_count": articles."{self.article_id}".voteupCount,
+            "comment_count": articles."{self.article_id}".commentCount,
+            "created": articles."{self.article_id}".created,
+            "updated": articles."{self.article_id}".updated,
+            "column": articles."{self.article_id}".column.title,
+            "column_url": articles."{self.article_id}".column.url,
+            "column_intro": articles."{self.article_id}".column.intro
+        }}"""
+        result = jmespath.search(expression, data)
+        return result
+
+    @staticmethod
+    def _resolve_status_api_data(data: Dict) -> Dict:
+        result = {
+            "author": data["author"]["name"],
+            "author_url": ZHIHU_HOST + "/people/" + data["author"]["url_token"],
+            "created": data["created"],
+            "updated": data["updated"],
+            "text": None,
+            "raw_content": data["content_html"],
+            "like_count": data["like_count"],
+            "comment_count": data["comment_count"],
+            "media_files": [],
+            "origin_pin_id": None,
+        }
+        for content in data["content"]:
+            if content["type"] == "text":
+                result["text"] = content["content"]
+            elif content["type"] == "image":
+                media_item = MediaFile.from_dict(
+                    {
+                        "media_type": "image",
+                        "url": content["original_url"],
+                        "caption": "",
+                    }
+                )
+                result["media_files"].append(media_item)
+            elif content["type"] == "video":
+                media_item = MediaFile.from_dict(
+                    {
+                        "media_type": "video",
+                        "url": content["video_info"]["playlist"]["hd"]["play_url"],
+                        "caption": "",
+                    }
+                )
+                result["media_files"].append(media_item)
+        if "origin_pin" in data:
+            result["origin_pin_id"] = data["origin_pin"]["id"]
+            result["origin_pin_data"] = Zhihu._resolve_status_api_data(data["origin_pin"])
+        return result
+
+    def _parse_status_json_data(self, data: Dict) -> Dict:
+        expression = f"""{{
+                "author_url_token": pins."{self.status_id}".author,
+                "created": pins."{self.status_id}".created,
+                "updated": pins."{self.status_id}".updated,
+                "content": pins."{self.status_id}".content[0].content,
+                "pictures": pins."{self.status_id}".content[1:],
+                "like_count": pins."{self.status_id}".likeCount,
+                "comment_count": pins."{self.status_id}".commentCount,
+                "origin_pin_url": pins."{self.status_id}".originPin.url,
+                "origin_pin_author": pins."{self.status_id}".originPin.author.name,
+                "origin_pin_author_url_token": pins."{self.status_id}".originPin.author.urlToken,
+                "origin_pin_created": pins."{self.status_id}".originPin.created,
+                "origin_pin_updated": pins."{self.status_id}".originPin.updated,
+                "origin_pin_content": pins."{self.status_id}".originPin.content[0].content,
+                "origin_pin_pictures": pins."{self.status_id}".originPin.content[1:],
+                "origin_pin_like_count": pins."{self.status_id}".originPin.likeCount,
+                "origin_pin_comment_count": pins."{self.status_id}".originPin.commentCount
+                }}"""
+        result = jmespath.search(expression, data)
+        print(result)
+        author_url_token = result["author_url_token"]
+        expression = f"""{{
+                        "author": users."{author_url_token}".name
+                        }}"""
+        result.update(jmespath.search(expression, data))
+        return result
+
+    async def _get_question_id(self):
+        redirected_url = await get_redirect_url(self.url)
+        self.question_id = urlparse(redirected_url).path.split("/")[2]
+
+    def _generate_zhihu_cookie(self):
+        # TODO: a more elegant way to generate the zhihu cookie
+        pass
diff --git a/apps/api/src/services/scrapers/zhihu/config.py b/apps/api/src/services/scrapers/zhihu/config.py
new file mode 100644
index 0000000..117df27
--- /dev/null
+++ b/apps/api/src/services/scrapers/zhihu/config.py
@@ -0,0 +1,23 @@
+from src.config import ZHIHU_COOKIES_JSON
+
+SHORT_LIMIT = 600
+ZHIHU_COLUMNS_API_HOST = "https://zhuanlan.zhihu.com/api"
+ZHIHU_COLUMNS_API_HOST_V2 = "https://api.zhihu.com/article/"
+ZHIHU_API_HOST = "https://www.zhihu.com/api/v4"
+ZHIHU_API_ANSWER_PARAMS = ("include=content%2Cexcerpt%2Cauthor%2Cvoteup_count%2Ccomment_count%2Cquestion%2Ccreated_time"
+                    "%2Cquestion.detail")
+ZHIHU_HOST = "https://www.zhihu.com"
+ALL_METHODS = ["fxzhihu"]
+"""
+There are three methods to get zhihu item: from zhihu v4 api(api), a json object in the html script(json),
+ or parsing the html page content directly.
+ For most occasions, the api method is the best choice. But Zhihu official api only opens for status and article.
+ Therefore, we must use the json method to get the answer. And if one of the above two methods fails, the get_item method
+ would try to parse the html page content directly.
+ You can also pass the method as a parameter when initializing the Zhihu object. If not, the default method is api.
+"""
+
+if ZHIHU_COOKIES_JSON:
+    ZHIHU_COOKIES = ';'.join(f"{cookie['name']}={cookie['value']}" for cookie in ZHIHU_COOKIES_JSON)
+else:
+    ZHIHU_COOKIES = None
diff --git a/apps/api/src/services/telegraph/__init__.py b/apps/api/src/services/telegraph/__init__.py
new file mode 100644
index 0000000..218f992
--- /dev/null
+++ b/apps/api/src/services/telegraph/__init__.py
@@ -0,0 +1,74 @@
+# TODO: copy the html-to-telegraph package and modify it to fit the asynchronous model
+import random
+import traceback
+from typing import Any
+
+from html_telegraph_poster_v2.async_poster import (
+    AsyncTelegraphPoster,
+)
+from html_telegraph_poster_v2.async_poster.utils import DocumentPreprocessor
+
+from src.config import TELEGRAPH_TOKEN_LIST
+from fastfetchbot_shared.models.telegraph_item import TelegraphItem, from_str
+from fastfetchbot_shared.utils.logger import logger
+
+
+class Telegraph(TelegraphItem):
+    def __init__(
+        self,
+        title: str,
+        url: str,
+        author: str,
+        author_url: str,
+        category: str,
+        content: str,
+    ):
+        self.telegraph = AsyncTelegraphPoster(use_api=True)
+        self.title = title
+        self.url = url
+        self.author = author
+        self.author_url = author_url
+        self.category = category
+        self.content = content
+
+    @staticmethod
+    def from_dict(obj: Any) -> "Telegraph":
+        assert isinstance(obj, dict)
+        title = from_str(obj.get("title"))
+        url = from_str(obj.get("url"))
+        author = from_str(obj.get("author"))
+        author_url = from_str(obj.get("author_url"))
+        category = from_str(obj.get("category"))
+        content = from_str(obj.get("content"))
+        return Telegraph(title, url, author, author_url, category, content)
+
+    async def get_telegraph(self, upload_images: bool = True) -> str:
+        try:
+            if upload_images:
+                temp_html = DocumentPreprocessor(self.content, url=self.url)
+                logger.info("Telegraph: Uploading images to telegraph...")
+                await temp_html.upload_all_images()
+                self.content = temp_html.get_processed_html()
+            logger.info("Telegraph: Uploading to telegraph...")
+            if not TELEGRAPH_TOKEN_LIST:
+                await self.telegraph.create_api_token(
+                    short_name=self.author[0:14], author_name=self.author
+                )
+            else:
+                random_token = random.choice(TELEGRAPH_TOKEN_LIST)
+                await self.telegraph.set_token(random_token)
+
+            telegraph_post = await self.telegraph.post(
+                title=self.title,
+                author=self.author,
+                author_url=self.author_url,
+                text=self.content,
+            )
+            logger.info(
+                f"Telegraph: Uploaded to telegraph. Link: {telegraph_post['url']}"
+            )
+            telegraph_url = telegraph_post["url"]
+            return telegraph_url
+        except Exception as e:
+            traceback.print_exc()
+            return ""
diff --git a/apps/api/src/templates/bluesky_content.jinja2 b/apps/api/src/templates/bluesky_content.jinja2
new file mode 100644
index 0000000..b987047
--- /dev/null
+++ b/apps/api/src/templates/bluesky_content.jinja2
@@ -0,0 +1,19 @@
+<p><a href="{{ data.url }}">@{{ data.author }}</a>: </p>
+<p>created at {{ data.created_at }}</p>
+{#<p>Comments: {{ data.comments_count }} Likes: {{ data.likes_count }}</p>#}
+{{ data.html_content_text }}
+{% for media_file in data.media_files %}
+    {% if media_file.type == "image" %}
+        <img src="{{ media_file.url }}" alt="{{ media_file.caption }}" />
+    {% elif media_file.type == "video" %}
+        <video controls>
+            <source src="{{ media_file.url }}" type="video/mp4">
+            Your browser does not support the video tag.
+        </video>
+    {% endif %}
+{% endfor %}
+{% if data.retweet_post %}
+    <p>Repost:</p>
+    {{ data.retweet_post.content }}
+{% endif %}
+<hr>
\ No newline at end of file
diff --git a/apps/api/src/templates/bluesky_telegram_text.jinja2 b/apps/api/src/templates/bluesky_telegram_text.jinja2
new file mode 100644
index 0000000..73bfe84
--- /dev/null
+++ b/apps/api/src/templates/bluesky_telegram_text.jinja2
@@ -0,0 +1 @@
+<a href="{{ data.url }}">@{{ data.author }}</a>：{{ data.text }}
diff --git a/apps/api/src/templates/douban_content.jinja2 b/apps/api/src/templates/douban_content.jinja2
new file mode 100644
index 0000000..00b7c1d
--- /dev/null
+++ b/apps/api/src/templates/douban_content.jinja2
@@ -0,0 +1,5 @@
+{% if data.douban_type.value == 'group' %}
+    <p>作者：<a href="{{data.author_url}}">{{data.author}}</a></p>
+    <p>来自<a href="{{ data.group_url }}">{{data.group_name}}</a></p>
+{% endif %}
+{{ data.raw_content }}
\ No newline at end of file
diff --git a/apps/api/src/templates/douban_short_text.jinja2 b/apps/api/src/templates/douban_short_text.jinja2
new file mode 100644
index 0000000..9af5380
--- /dev/null
+++ b/apps/api/src/templates/douban_short_text.jinja2
@@ -0,0 +1,11 @@
+{% if data.douban_type.value == 'note' %}
+<a href="{{ data.author }}">{{ data.author_url }}</a>的豆瓣日记：<a href="{{ data.url }}"><b>{{ data.title }}</b></a>
+{% elif data.douban_type.value == 'group' %}
+<a href="{{ data.group_url }}">{{ data.group_name }}</a>：
+<a href="{{ data.url }}"><b>{{ data.title }}</b></a>
+{% elif data.douban_type.value == 'status' %}
+<a href="{{data.url}}"><b>{{ data.title }}</b></a>：
+{% elif data.douban_type.value in ['movie_review','book_review'] %}
+<a href="{{ data.author }}">{{ data.author_url }}</a>对<a href="{{ data.item_url }}">{{ data.item_title }}</a>的{{ data.item_type }}：
+<a href="{{ data.url }}"><b>{{ data.title }}</b></a>
+{% endif %}{{ data.short_text }}
\ No newline at end of file
diff --git a/apps/api/src/templates/reddit_content.jinja2 b/apps/api/src/templates/reddit_content.jinja2
new file mode 100644
index 0000000..0292434
--- /dev/null
+++ b/apps/api/src/templates/reddit_content.jinja2
@@ -0,0 +1,7 @@
+<h2><a href="{{ data.url }}">{{ data.title }}</a></h2>
+
+<p>From <a href="{{ data.subreddit_url }}">{{ data.subreddit }}</a><a href="{{ data.author_url }}">{{ author }}</a> created at {{ data.created }}</p>
+
+<p>Comments: {{ data.comments_count }} Score: {{ data.score }} Upvote ratio: {{ data.upvote_ratio }}</p>
+
+{{ data.content }}
\ No newline at end of file
diff --git a/apps/api/src/templates/reddit_short_text.jinja2 b/apps/api/src/templates/reddit_short_text.jinja2
new file mode 100644
index 0000000..1671e87
--- /dev/null
+++ b/apps/api/src/templates/reddit_short_text.jinja2
@@ -0,0 +1,3 @@
+<a href="{{ data.url }}"><b>{{ data.title }}</b></a>
+From <a href="{{ data.subreddit_url }}">{{ data.subreddit_name_prefixed }}</a> - <a href="{{ data.author_url }}">{{ data.author }}</a>
+{{ data.text }}
\ No newline at end of file
diff --git a/apps/api/src/templates/video_info.jinja2 b/apps/api/src/templates/video_info.jinja2
new file mode 100644
index 0000000..60e8fa0
--- /dev/null
+++ b/apps/api/src/templates/video_info.jinja2
@@ -0,0 +1,6 @@
+<a href="{{ data.url }}"><b>{{ data.title }}</b></a>
+作者：<a href="{{ data.author_url }}">{{ data.author }}</a>
+视频时长：{{ data.duration }}
+视频上传日期：{{ data.created }}
+播放数据：{{ data.playback_data }}
+视频简介：{{ data.description }}
diff --git a/apps/api/src/templates/weibo_content.jinja2 b/apps/api/src/templates/weibo_content.jinja2
new file mode 100644
index 0000000..3e12141
--- /dev/null
+++ b/apps/api/src/templates/weibo_content.jinja2
@@ -0,0 +1,11 @@
+<p>发布于{{ data.date }}</p>
+{% if data.source %}
+<p>通过{{ data.source }}发布</p>
+{% endif %}
+<p>转发：{{ data.reposts_count }} 评论：{{ data.comments_count }} 点赞：{{ data.attitudes_count }}
+{% if data.region_name %}
+来自{{ data.region_name }}
+{% endif %}
+</p>
+<p><a href="{{ data.author_url }}">@{{ data.author }}</a>：</p>
+{{ data.raw_content }}
\ No newline at end of file
diff --git a/apps/api/src/templates/weibo_short_text.jinja2 b/apps/api/src/templates/weibo_short_text.jinja2
new file mode 100644
index 0000000..377215e
--- /dev/null
+++ b/apps/api/src/templates/weibo_short_text.jinja2
@@ -0,0 +1,5 @@
+<a href="{{ data.url }}">@{{ data.author }}</a>：{{ data.raw_content }}
+{% if data.retweeted_info %}
+
+<a href="{{ data.retweeted_info.url }}">@{{ data.retweeted_info.author }}</a>：{{ data.raw_content }}
+{% endif %}
\ No newline at end of file
diff --git a/apps/api/src/templates/xiaohongshu_content.jinja2 b/apps/api/src/templates/xiaohongshu_content.jinja2
new file mode 100644
index 0000000..d26fbed
--- /dev/null
+++ b/apps/api/src/templates/xiaohongshu_content.jinja2
@@ -0,0 +1,10 @@
+<h2>{{ data.title }}</h2>
+<p>作者：<a href="{{ author_url }}">{{ data.author }}</a></p>
+<p>发布于 {{ data.created }} 最近更新于 {{ data.updated }}</p>
+<p>收藏：{{ data.collected_count }} 转发：{{ data.share_count }} 评论：{{ data.comments_count }} 点赞：{{ data.like_count }}
+{% if data.ip_location %}
+来自{{ data.ip_location }}
+{% endif %}
+</p>
+<p><a href="{{ data.author_url }}">@{{ data.author }}</a>：</p>
+{{ data.raw_content }}
\ No newline at end of file
diff --git a/apps/api/src/templates/xiaohongshu_short_text.jinja2 b/apps/api/src/templates/xiaohongshu_short_text.jinja2
new file mode 100644
index 0000000..0f0e88a
--- /dev/null
+++ b/apps/api/src/templates/xiaohongshu_short_text.jinja2
@@ -0,0 +1,2 @@
+<b><a href="{{ data.url }}">{{ data.title }}</a></b> - <a href="{{ data.author_url }}">{{ data.author }}</a>
+{{ data.raw_content }}
\ No newline at end of file
diff --git a/apps/api/src/templates/zhihu_content.jinja2 b/apps/api/src/templates/zhihu_content.jinja2
new file mode 100644
index 0000000..2ad8501
--- /dev/null
+++ b/apps/api/src/templates/zhihu_content.jinja2
@@ -0,0 +1,47 @@
+{# templates/zhihu_short_text.html #}
+
+{% if data.zhihu_type == "answer" %}
+<h2><a href="https://www.zhihu.com/question/{{ data.question_id }}" target="_blank">{{ data.title }}</a></h2>
+{% if data.question is not none %}
+<p>{{ data.question }}</p>
+<p>发表于：{{ data.question_date }} 更新于：{{ data.question_updated }}<br>回答数：{{ data.question_answer_count }} 关注数：{{ data.question_follower_count }}</p>
+<hr>
+{% endif %}
+<p>回答作者：<a href="{{ data.author_url }}">{{ data.author }}</a></p>
+<p>发表于：{{ data.date }} 更新于：{{ data.updated }}
+<br>
+点赞数：{{ self.upvote }}
+{% if data.comment_count %}
+<br>
+评论数：{{ self.comment_count }}
+{% endif %}
+</p>
+{{ data.raw_content }}
+{% elif data.zhihu_type == "article" %}
+{% if data.column %}
+<p>所属专栏：<a href="{{ data.column_url }}">{{ data.column }}</a></p>
+<p>专栏简介：{{ data.column_intro }}</p>
+{% endif %}
+<p>作者：<a href="{{ data.author_url }}">{{ data.author }}</a></p>
+<p>发表于：{{ data.date }}
+{% if data.updated != data.date %}
+更新于：{{ data.updated }}
+{% endif %}</p>
+更新于：{{ data.updated }}
+<br>
+<p>{% if data.upvote %}点赞数：{{ data.upvote }}{% endif %}{% if data.comment_count %}评论数：{{ data.comment_count }}{% endif %}</p>
+<br>
+{{ data.raw_content }}
+{% elif data.zhihu_type == "status" %}
+<p>发表于{{ data.date }} 点赞数：{{ data.upvote }} 评论数：{{ data.comment_count }}</p>
+<p><a href="{{ data.author_url }}">@{{ data.author }}</a>：{{ data.raw_content }}</p>
+{{ data.pic_content }}
+{% if data.origin_pin_author %}
+<hr>
+<p>发表于{{ data.origin_pin_date }} 点赞数：{{ data.origin_pin_upvote }} 评论数：{{ data.origin_pin_comment_count }}</p>
+<p><a href="{{ data.origin_pin_author_url }}">@{{ data.origin_pin_author }}</a>：{{ data.origin_pin_content }}</p>
+{{ data.origin_pin_pic_content }}
+{% endif %}
+{% endif %}
+{#<hr>#}
+{#<a href="{{ data.url }}">阅读原文</a>#}
\ No newline at end of file
diff --git a/apps/api/src/templates/zhihu_short_text.jinja2 b/apps/api/src/templates/zhihu_short_text.jinja2
new file mode 100644
index 0000000..eb247cb
--- /dev/null
+++ b/apps/api/src/templates/zhihu_short_text.jinja2
@@ -0,0 +1,11 @@
+{# templates/zhihu_short_text.html #}
+
+{% if data.zhihu_type == "status" %}
+<a href="{{ data.url }}"><b>{{ data.title }}</b></a>：{{ data.content }}
+{% if data.origin_pin_url %}
+<a href="{{ data.origin_pin_url }}"><b>{{ data.origin_pin_author }}的想法</b></a>：{{ data.origin_pin_content }}
+{% endif %}
+{% else %}
+<a href="{{ data.url }}"><b>{{ data.title }}</b> - {{ data.author }}的{{ data.translated_zhihu_type }}</a>
+{{ data.content }}
+{% endif %}
\ No newline at end of file
diff --git a/apps/telegram-bot/Dockerfile b/apps/telegram-bot/Dockerfile
new file mode 100644
index 0000000..821fa64
--- /dev/null
+++ b/apps/telegram-bot/Dockerfile
@@ -0,0 +1,50 @@
+
+# `python-base` sets up all our shared environment variables
+FROM python:3.12-slim AS python-base
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    # uv settings
+    UV_PROJECT_ENVIRONMENT="/opt/pysetup/.venv" \
+    UV_COMPILE_BYTECODE=1 \
+    UV_LINK_MODE=copy \
+    # paths
+    PYSETUP_PATH="/opt/pysetup" \
+    VENV_PATH="/opt/pysetup/.venv"
+
+# prepend venv to path
+ENV PATH="$VENV_PATH/bin:$PATH"
+
+
+# `builder-base` stage is used to build deps + create our virtual environment
+FROM python-base AS builder-base
+
+# install uv from the official image
+COPY --from=ghcr.io/astral-sh/uv:0.10.4 /uv /usr/local/bin/uv
+
+RUN apt-get update \
+    && apt-get install --no-install-recommends -y \
+        libmagic1 \
+        build-essential
+
+# copy workspace files for dependency resolution
+WORKDIR $PYSETUP_PATH
+COPY pyproject.toml uv.lock ./
+COPY packages/ packages/
+COPY apps/telegram-bot/ apps/telegram-bot/
+
+# install runtime deps
+RUN uv sync --frozen --no-dev --no-install-project --package fastfetchbot-telegram-bot
+
+
+# `production` image used for runtime
+FROM python-base AS production
+ENV PYTHONPATH=/app/apps/telegram-bot:$PYTHONPATH
+RUN apt-get update \
+    && apt-get install --no-install-recommends -y \
+        libmagic1
+COPY --from=builder-base $PYSETUP_PATH $PYSETUP_PATH
+COPY packages/ /app/packages/
+COPY apps/telegram-bot/ /app/apps/telegram-bot/
+WORKDIR /app/apps/telegram-bot
+CMD ["python", "-m", "core.main"]
diff --git a/apps/telegram-bot/core/__init__.py b/apps/telegram-bot/core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apps/telegram-bot/core/api_client.py b/apps/telegram-bot/core/api_client.py
new file mode 100644
index 0000000..90364d1
--- /dev/null
+++ b/apps/telegram-bot/core/api_client.py
@@ -0,0 +1,34 @@
+import httpx
+from core.config import API_SERVER_URL, API_KEY, API_KEY_NAME
+from fastfetchbot_shared.utils.logger import logger
+
+
+async def get_item(url: str, ban_list: list = None, **kwargs) -> dict:
+    """Call API server's /scraper/getItem endpoint."""
+    params = {"url": url, API_KEY_NAME: API_KEY}
+    params.update(kwargs)
+    if ban_list:
+        params["ban_list"] = ",".join(ban_list)
+    async with httpx.AsyncClient() as client:
+        resp = await client.post(
+            f"{API_SERVER_URL}/scraper/getItem",
+            params=params,
+            timeout=120,
+        )
+        resp.raise_for_status()
+        return resp.json()
+
+
+async def get_url_metadata(url: str, ban_list: list = None) -> dict:
+    """Call API server's /scraper/getUrlMetadata endpoint."""
+    params = {"url": url, API_KEY_NAME: API_KEY}
+    if ban_list:
+        params["ban_list"] = ",".join(ban_list)
+    async with httpx.AsyncClient() as client:
+        resp = await client.post(
+            f"{API_SERVER_URL}/scraper/getUrlMetadata",
+            params=params,
+            timeout=30,
+        )
+        resp.raise_for_status()
+        return resp.json()
diff --git a/apps/telegram-bot/core/config.py b/apps/telegram-bot/core/config.py
new file mode 100644
index 0000000..81c63d9
--- /dev/null
+++ b/apps/telegram-bot/core/config.py
@@ -0,0 +1,136 @@
+import os
+import secrets
+
+from jinja2 import Environment, FileSystemLoader
+
+from fastfetchbot_shared.utils.parse import get_env_bool
+
+env = os.environ
+current_directory = os.path.dirname(os.path.abspath(__file__))
+
+# API Server connection (for calling the FastFetchBot API server)
+API_SERVER_URL = env.get("API_SERVER_URL", "http://localhost:10450")
+API_KEY_NAME = env.get("API_KEY_NAME", "pwd")
+API_KEY = env.get("API_KEY", secrets.token_urlsafe(32))
+
+# Bot's own BASE_URL (for webhook registration)
+BASE_URL = env.get("BASE_URL", "localhost")
+
+# Telegram bot environment variables
+TELEGRAM_BOT_ON = get_env_bool(env, "TELEGRAM_BOT_ON", True)
+TELEGRAM_BOT_MODE = env.get("TELEGRAM_BOT_MODE", "polling")
+TELEGRAM_BOT_TOKEN = env.get("TELEGRAM_BOT_TOKEN", None)
+TELEGRAM_BOT_SECRET_TOKEN = env.get(
+    "TELEGRAM_BOT_SECRET_TOKEN", secrets.token_urlsafe(32)
+)
+
+# Telegram channel configuration
+TELEGRAM_CHANNEL_ID = []
+telegram_channel_id = env.get("TELEGRAM_CHANNEL_ID", "").split(",")
+for single_telegram_channel_id in telegram_channel_id:
+    if single_telegram_channel_id.startswith("@"):
+        TELEGRAM_CHANNEL_ID.append(single_telegram_channel_id)
+    elif single_telegram_channel_id.startswith("-1"):
+        TELEGRAM_CHANNEL_ID.append(int(single_telegram_channel_id))
+if len(TELEGRAM_CHANNEL_ID) == 0:
+    TELEGRAM_CHANNEL_ID = None
+
+# Debug channel
+telebot_debug_channel = env.get("TELEBOT_DEBUG_CHANNEL", "")
+if telebot_debug_channel.startswith("@"):
+    TELEBOT_DEBUG_CHANNEL = telebot_debug_channel
+elif telebot_debug_channel.startswith("-1"):
+    TELEBOT_DEBUG_CHANNEL = int(telebot_debug_channel)
+else:
+    TELEBOT_DEBUG_CHANNEL = None
+
+# Channel admin list
+telegram_channel_admin_list = env.get("TELEGRAM_CHANNEL_ADMIN_LIST", "")
+TELEGRAM_CHANNEL_ADMIN_LIST = [
+    admin_id for admin_id in telegram_channel_admin_list.split(",")
+]
+if not TELEGRAM_CHANNEL_ADMIN_LIST:
+    TELEGRAM_CHANNEL_ADMIN_LIST = None
+
+# Webhook URL (constructed from bot's own BASE_URL)
+TELEGRAM_WEBHOOK_URL = f"https://{BASE_URL}/webhook"
+
+# Telegram Bot API server configuration
+TELEBOT_API_SERVER_HOST = env.get("TELEBOT_API_SERVER_HOST", None)
+TELEBOT_API_SERVER_PORT = env.get("TELEBOT_API_SERVER_PORT", None)
+TELEBOT_API_SERVER = (
+    f"http://{TELEBOT_API_SERVER_HOST}:{TELEBOT_API_SERVER_PORT}" + "/bot"
+    if (TELEBOT_API_SERVER_HOST and TELEBOT_API_SERVER_PORT)
+    else "https://api.telegram.org/bot"
+)
+TELEBOT_API_SERVER_FILE = (
+    f"http://{TELEBOT_API_SERVER_HOST}:{TELEBOT_API_SERVER_PORT}" + "/file/bot"
+    if (TELEBOT_API_SERVER_HOST and TELEBOT_API_SERVER_PORT)
+    else "https://api.telegram.org/file/bot"
+)
+TELEBOT_LOCAL_FILE_MODE = (
+    False if TELEBOT_API_SERVER == "https://api.telegram.org/bot" else True
+)
+
+# Telegram Bot timeouts
+TELEBOT_CONNECT_TIMEOUT = int(env.get("TELEGRAM_CONNECT_TIMEOUT", 15)) or 15
+TELEBOT_READ_TIMEOUT = int(env.get("TELEGRAM_READ_TIMEOUT", 60)) or 60
+TELEBOT_WRITE_TIMEOUT = int(env.get("TELEGRAM_WRITE_TIMEOUT", 60)) or 60
+TELEBOT_MAX_RETRY = int(env.get("TELEGRAM_MAX_RETRY", 5)) or 5
+
+# Telegram image limits
+TELEGRAM_IMAGE_DIMENSION_LIMIT = int(env.get("TELEGRAM_IMAGE_SIZE_LIMIT", 1600)) or 1600
+TELEGRAM_IMAGE_SIZE_LIMIT = (
+    int(env.get("TELEGRAM_IMAGE_SIZE_LIMIT", 5242880)) or 5242880
+)
+
+# Ban lists
+telegram_group_message_ban_list = env.get("TELEGRAM_GROUP_MESSAGE_BAN_LIST", "")
+telegram_bot_message_ban_list = env.get("TELEGRAM_BOT_MESSAGE_BAN_LIST", "")
+
+
+def ban_list_resolver(ban_list_string: str) -> list:
+    ban_list = ban_list_string.split(",")
+    for item in ban_list:
+        if item == "social_media":
+            ban_list.extend(
+                [
+                    "weibo",
+                    "twitter",
+                    "instagram",
+                    "zhihu",
+                    "douban",
+                    "wechat",
+                    "xiaohongshu",
+                    "reddit",
+                ]
+            )
+        elif item == "video":
+            ban_list.extend(["youtube", "bilibili"])
+    return ban_list
+
+
+TELEGRAM_GROUP_MESSAGE_BAN_LIST = ban_list_resolver(telegram_group_message_ban_list)
+TELEGRAM_BOT_MESSAGE_BAN_LIST = ban_list_resolver(telegram_bot_message_ban_list)
+
+# Feature flags (needed for handler logic)
+FILE_EXPORTER_ON = get_env_bool(env, "FILE_EXPORTER_ON", True)
+OPENAI_API_KEY = env.get("OPENAI_API_KEY", None)
+GENERAL_SCRAPING_ON = get_env_bool(env, "GENERAL_SCRAPING_ON", False)
+
+# Database configuration
+DATABASE_ON = get_env_bool(env, "DATABASE_ON", False)
+MONGODB_PORT = int(env.get("MONGODB_PORT", 27017)) or 27017
+MONGODB_HOST = env.get("MONGODB_HOST", "localhost")
+MONGODB_URL = env.get("MONGODB_URL", f"mongodb://{MONGODB_HOST}:{MONGODB_PORT}")
+
+# Jinja2 template configuration
+templates_directory = os.path.join(current_directory, "templates")
+JINJA2_ENV = Environment(
+    loader=FileSystemLoader(templates_directory), lstrip_blocks=True, trim_blocks=True
+)
+
+# Template language
+TEMPLATE_LANGUAGE = env.get(
+    "TEMPLATE_LANGUAGE", "zh_CN"
+)  # It is a workaround for translation system
diff --git a/apps/telegram-bot/core/database.py b/apps/telegram-bot/core/database.py
new file mode 100644
index 0000000..7942ee3
--- /dev/null
+++ b/apps/telegram-bot/core/database.py
@@ -0,0 +1,37 @@
+from typing import Union, List
+
+from motor.motor_asyncio import AsyncIOMotorClient
+from beanie import init_beanie, Document
+
+from core.config import MONGODB_URL
+from core.models.database_model import document_list
+from fastfetchbot_shared.utils.logger import logger
+
+
+async def startup() -> None:
+    client = AsyncIOMotorClient(MONGODB_URL)
+    await init_beanie(database=client["telegram_bot"], document_models=document_list)
+
+
+async def shutdown() -> None:
+    pass
+
+
+async def save_instances(instances: Union[Document, List[Document]], *args) -> None:
+    if instances is None:
+        raise TypeError("instances must be a Model or a list of Model")
+
+    if isinstance(instances, Document):
+        instance_type = type(instances)
+        await instance_type.insert(instances)
+    elif isinstance(instances, list):
+        instance_type = type(instances[0])
+        await instance_type.insert_many(instances)
+    else:
+        raise TypeError("instances must be a Model or a list of Model")
+
+    for arg in args:
+        if not isinstance(arg, Document):
+            raise TypeError("args must be a Model")
+        instance_type = type(arg)
+        await instance_type.insert_one(arg)
diff --git a/apps/telegram-bot/core/handlers/__init__.py b/apps/telegram-bot/core/handlers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apps/telegram-bot/core/handlers/buttons.py b/apps/telegram-bot/core/handlers/buttons.py
new file mode 100644
index 0000000..d3a24c1
--- /dev/null
+++ b/apps/telegram-bot/core/handlers/buttons.py
@@ -0,0 +1,118 @@
+from telegram import (
+    Update,
+    InlineKeyboardButton,
+    InlineKeyboardMarkup,
+)
+from telegram.ext import (
+    CallbackContext,
+)
+
+from fastfetchbot_shared.models.metadata_item import MessageType
+from core import api_client
+from core.services.message_sender import send_item_message
+from fastfetchbot_shared.utils.logger import logger
+from core.config import (
+    TELEGRAM_CHANNEL_ID,
+)
+
+
+async def buttons_process(update: Update, context: CallbackContext) -> None:
+    from core.services.bot_app import application
+
+    query = update.callback_query
+    data = query.data
+    chat_id = None
+    if data["type"] == "cancel":
+        await query.answer("Canceled")
+    else:
+        if data["type"] == "private" or data["type"] == "force":
+            await query.answer("Sending to you...")
+        if data["type"] == "channel":
+            if data.get("channel_id") or len(TELEGRAM_CHANNEL_ID) == 1:
+                channel_chat = await application.bot.get_chat(
+                    chat_id=data.get("channel_id")
+                    if data.get("channel_id")
+                    else TELEGRAM_CHANNEL_ID[0]
+                )
+                await query.answer("Sending to channel...")
+                if channel_chat.type == "channel":
+                    chat_id = channel_chat.id
+                else:
+                    await query.message.reply_text(
+                        text="Sorry, the provided channel id does not exist or is not a channel."
+                    )
+                    chat_id = query.message.chat_id
+            elif len(TELEGRAM_CHANNEL_ID) > 1:
+                choose_channel_keyboard = await _create_choose_channel_keyboard(
+                    data=data
+                )
+                await query.message.reply_text(
+                    text="Please choose the channel you want to send:",
+                    reply_markup=InlineKeyboardMarkup(choose_channel_keyboard),
+                )
+                await query.message.delete()
+                context.drop_callback_data(query)
+                return
+        else:
+            chat_id = query.message.chat_id
+        if data["type"] == "video":
+            await query.answer("Video processing...")
+        replying_message = await query.message.reply_text(
+            text=f"Item processing...",
+        )
+        extra_args = data["extra_args"] if "extra_args" in data else {}
+        metadata_item = await api_client.get_item(
+            url=data["url"], **extra_args
+        )
+        await replying_message.edit_text(
+            text=f"Item processed. Sending to the target...",
+        )
+        if data["type"] == "force":
+            metadata_item["message_type"] = MessageType.SHORT
+        await send_item_message(metadata_item, chat_id=chat_id)
+        if data["type"] == "channel":
+            await query.message.reply_text(
+                text=f"Item sent to the channel.",
+            )
+        await replying_message.delete()
+    await query.message.delete()
+    context.drop_callback_data(query)
+
+
+async def _create_choose_channel_keyboard(data: dict) -> list:
+    from core.services.bot_app import application
+
+    choose_channel_keyboard = []
+    for i, channel_id in enumerate(TELEGRAM_CHANNEL_ID):
+        channel_chat = await application.bot.get_chat(chat_id=channel_id)
+        choose_channel_keyboard.append(
+            [
+                InlineKeyboardButton(
+                    channel_chat.title,
+                    callback_data={
+                        "type": "channel",
+                        "url": data["url"],
+                        "source": data.get("source"),
+                        "content_type": data.get("content_type"),
+                        "extra_args": data["extra_args"],
+                        "channel_id": channel_id,
+                    },
+                )
+            ]
+        )
+    choose_channel_keyboard.append(
+        [
+            InlineKeyboardButton(
+                "Cancel",
+                callback_data={"type": "cancel"},
+            )
+        ]
+    )
+    return choose_channel_keyboard
+
+
+async def invalid_buttons(update: Update, context: CallbackContext) -> None:
+    await update.callback_query.answer("Invalid button!")
+    await update.effective_message.edit_text(
+        "Sorry, Error Occurred, I could not process this button click."
+    )
diff --git a/apps/telegram-bot/core/handlers/messages.py b/apps/telegram-bot/core/handlers/messages.py
new file mode 100644
index 0000000..e4012ae
--- /dev/null
+++ b/apps/telegram-bot/core/handlers/messages.py
@@ -0,0 +1,58 @@
+import html
+import json
+import traceback
+
+from telegram import (
+    Update,
+)
+from telegram.constants import ParseMode
+from telegram.ext import (
+    CallbackContext,
+    ContextTypes,
+)
+
+from core.database import save_instances
+from core.models.telegram_chat import TelegramMessage, TelegramUser, TelegramChat
+from fastfetchbot_shared.utils.logger import logger
+from core.config import (
+    TELEBOT_DEBUG_CHANNEL,
+    DATABASE_ON,
+)
+
+
+async def all_messages_process(update: Update, context: CallbackContext) -> None:
+    message = update.message
+    logger.debug(message)
+    if message and DATABASE_ON:
+        telegram_chat = TelegramChat.construct(**message.chat.to_dict())
+        telegram_user = TelegramUser.construct(**message.from_user.to_dict())
+        telegram_message = TelegramMessage(
+            datetime=message.date,
+            chat=telegram_chat,
+            user=telegram_user,
+            text=message.text or "",
+        )
+        await save_instances(telegram_message)
+
+
+async def error_process(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
+    logger.error("Exception while handling an update:", exc_info=context.error)
+    tb_list = traceback.format_exception(
+        None, context.error, context.error.__traceback__
+    )
+    tb_string = "".join(tb_list)
+    update_str = update.to_dict() if isinstance(update, Update) else str(update)
+    message = (
+        f"An exception was raised while handling an update\n"
+        f"<pre>update = {html.escape(json.dumps(update_str, indent=2, ensure_ascii=False))}"
+        "</pre>\n\n"
+        f"<pre>context.chat_data = {html.escape(str(context.chat_data))}</pre>\n\n"
+        f"<pre>context.user_data = {html.escape(str(context.user_data))}</pre>\n\n"
+        f"<pre>{html.escape(tb_string)}</pre>"
+    )
+    debug_chat_id = update.message.chat_id
+    if TELEBOT_DEBUG_CHANNEL is not None:
+        debug_chat_id = TELEBOT_DEBUG_CHANNEL
+    await context.bot.send_message(
+        chat_id=debug_chat_id, text=message, parse_mode=ParseMode.HTML
+    )
diff --git a/apps/telegram-bot/core/handlers/url_process.py b/apps/telegram-bot/core/handlers/url_process.py
new file mode 100644
index 0000000..2497891
--- /dev/null
+++ b/apps/telegram-bot/core/handlers/url_process.py
@@ -0,0 +1,225 @@
+from telegram import (
+    Update,
+    InlineKeyboardButton,
+    InlineKeyboardMarkup,
+)
+from telegram.ext import (
+    CallbackContext,
+)
+
+from core import api_client
+from core.services.message_sender import send_item_message
+from fastfetchbot_shared.utils.config import SOCIAL_MEDIA_WEBSITE_PATTERNS, VIDEO_WEBSITE_PATTERNS
+from fastfetchbot_shared.utils.logger import logger
+from core.config import (
+    TELEGRAM_CHANNEL_ID,
+    TELEGRAM_CHANNEL_ADMIN_LIST,
+    TELEGRAM_GROUP_MESSAGE_BAN_LIST,
+    TELEGRAM_BOT_MESSAGE_BAN_LIST,
+    FILE_EXPORTER_ON,
+    OPENAI_API_KEY,
+    GENERAL_SCRAPING_ON,
+)
+
+
+async def https_url_process(update: Update, context: CallbackContext) -> None:
+    message = update.message
+    welcome_message = await message.reply_text(
+        text="Processing...",
+    )
+    url_dict: dict = message.parse_entities(types=["url"])
+    await welcome_message.delete()
+    for i, url in enumerate(url_dict.values()):
+        process_message = await message.reply_text(
+            text=f"Processing the {i + 1}th url...",
+        )
+        url_metadata = await api_client.get_url_metadata(url, ban_list=TELEGRAM_BOT_MESSAGE_BAN_LIST)
+        if url_metadata["source"] == "banned":
+            await process_message.edit_text(
+                text=f"For the {i + 1} th url, the url is banned."
+            )
+            return
+        if url_metadata["source"] == "unknown":
+            if GENERAL_SCRAPING_ON:
+                await process_message.edit_text(
+                    text=f"Uncategorized url found. General webpage parser is on, Processing..."
+                )
+                metadata_item = await api_client.get_item(url=url_metadata["url"])
+                await send_item_message(
+                    metadata_item, chat_id=message.chat_id
+                )
+            await process_message.edit_text(
+                text=f"For the {i + 1} th url, no supported url found."
+            )
+            return
+        else:
+            await process_message.edit_text(
+                text=f"{url_metadata['source']} url found. Processing..."
+            )
+            # create the inline keyboard
+            special_function_keyboard = []
+            basic_function_keyboard = []
+            if TELEGRAM_CHANNEL_ID and (
+                    TELEGRAM_CHANNEL_ADMIN_LIST
+                    and str(message.from_user.id) in TELEGRAM_CHANNEL_ADMIN_LIST
+            ):
+                special_function_keyboard.append(
+                    InlineKeyboardButton(
+                        "Send to Channel",
+                        callback_data={
+                            "type": "channel",
+                            "url": url_metadata["url"],
+                            "source": url_metadata["source"],
+                            "content_type": url_metadata["content_type"],
+                            "extra_args": {"store_document": True},
+                        },
+                    ),
+                )
+            # video content url buttons
+            if url_metadata["content_type"] == "video":
+                basic_function_keyboard.extend(
+                    [
+                        InlineKeyboardButton(
+                            "Get Info",
+                            callback_data={
+                                "type": "video",
+                                "url": url_metadata["url"],
+                                "source": url_metadata["source"],
+                                "content_type": url_metadata["content_type"],
+                                "extra_args": {"download": False},
+                            },
+                        ),
+                        InlineKeyboardButton(
+                            "Download",
+                            callback_data={
+                                "type": "video",
+                                "url": url_metadata["url"],
+                                "source": url_metadata["source"],
+                                "content_type": url_metadata["content_type"],
+                            },
+                        ),
+                    ]
+                )
+                if FILE_EXPORTER_ON:
+                    special_function_keyboard.extend(
+                        [
+                            InlineKeyboardButton(
+                                "Audio Only",
+                                callback_data={
+                                    "type": "video",
+                                    "url": url_metadata["url"],
+                                    "source": url_metadata["source"],
+                                    "content_type": url_metadata["content_type"],
+                                    "extra_args": {
+                                        "audio_only": True,
+                                    },
+                                },
+                            ),
+                            InlineKeyboardButton(
+                                "Download HD",
+                                callback_data={
+                                    "type": "video",
+                                    "url": url_metadata["url"],
+                                    "source": url_metadata["source"],
+                                    "content_type": url_metadata["content_type"],
+                                    "extra_args": {"hd": True},
+                                },
+                            ),
+                        ]
+                    )
+                    if OPENAI_API_KEY:
+                        special_function_keyboard.append(
+                            InlineKeyboardButton(
+                                "Transcribe Text",
+                                callback_data={
+                                    "type": "video",
+                                    "url": url_metadata["url"],
+                                    "source": url_metadata["source"],
+                                    "content_type": url_metadata["content_type"],
+                                    "extra_args": {
+                                        "audio_only": True,
+                                        "transcribe": True,
+                                        "store_document": True,
+                                    },
+                                },
+                            ),
+                        )
+            elif url_metadata["content_type"] == "social_media":
+                basic_function_keyboard.extend(
+                    [
+                        InlineKeyboardButton(
+                            "Send to Me",
+                            callback_data={
+                                "type": "private",
+                                "url": url_metadata["url"],
+                                "source": url_metadata["source"],
+                                "content_type": url_metadata["content_type"],
+                            },
+                        ),
+                        InlineKeyboardButton(
+                            "Force Send in Chat",
+                            callback_data={
+                                "type": "force",
+                                "url": url_metadata["url"],
+                                "source": url_metadata["source"],
+                                "content_type": url_metadata["content_type"],
+                            },
+                        ),
+                    ]
+                )
+                if FILE_EXPORTER_ON:
+                    special_function_keyboard.append(
+                        InlineKeyboardButton(
+                            "Send with PDF",
+                            callback_data={
+                                "type": "pdf",
+                                "url": url_metadata["url"],
+                                "source": url_metadata["source"],
+                                "content_type": url_metadata["content_type"],
+                                "extra_args": {"store_document": True},
+                            },
+                        ),
+                    )
+            basic_function_keyboard.append(
+                InlineKeyboardButton(
+                    "Cancel",
+                    callback_data={"type": "cancel"},
+                ),
+            )
+            keyboard = [
+                special_function_keyboard,
+                basic_function_keyboard,
+            ]
+            reply_markup = InlineKeyboardMarkup(keyboard)
+            await process_message.reply_text(
+                f"For the {i + 1}th url: {url}, please choose the function you want to use:",
+                reply_markup=reply_markup,
+            )
+            await process_message.delete()
+
+
+async def https_url_auto_process(update: Update, context: CallbackContext) -> None:
+    message = update.message
+    url_dict = message.parse_entities(types=["url"])
+    for i, url in enumerate(url_dict.values()):
+        url_metadata = await api_client.get_url_metadata(
+            url, ban_list=TELEGRAM_GROUP_MESSAGE_BAN_LIST
+        )
+        if url_metadata["source"] == "unknown" and GENERAL_SCRAPING_ON:
+            metadata_item = await api_client.get_item(url=url_metadata["url"])
+            await send_item_message(
+                metadata_item, chat_id=message.chat_id, message=message
+            )
+        elif url_metadata["source"] == "unknown" or url_metadata["source"] == "banned":
+            logger.debug(f"for the {i + 1}th url {url}, no supported url found.")
+            return
+        if url_metadata.get("source") in SOCIAL_MEDIA_WEBSITE_PATTERNS.keys():
+            metadata_item = await api_client.get_item(url=url_metadata["url"])
+            await send_item_message(
+                metadata_item, chat_id=message.chat_id, message=message
+            )
+        if url_metadata.get("source") in VIDEO_WEBSITE_PATTERNS.keys():
+            metadata_item = await api_client.get_item(url=url_metadata["url"])
+            await send_item_message(
+                metadata_item, chat_id=message.chat_id, message=message
+            )
diff --git a/apps/telegram-bot/core/main.py b/apps/telegram-bot/core/main.py
new file mode 100644
index 0000000..90435a4
--- /dev/null
+++ b/apps/telegram-bot/core/main.py
@@ -0,0 +1,13 @@
+import uvicorn
+from core.webhook.server import webhook_app, callback_app
+from core.config import TELEGRAM_BOT_MODE
+from fastfetchbot_shared.utils.logger import logger
+
+
+if __name__ == "__main__":
+    if TELEGRAM_BOT_MODE == "webhook":
+        logger.info("Running in webhook mode on port 10451")
+        uvicorn.run(webhook_app, host="0.0.0.0", port=10451)
+    else:
+        logger.info("Running in polling mode (HTTP server on port 10451 for callbacks)")
+        uvicorn.run(callback_app, host="0.0.0.0", port=10451)
diff --git a/apps/telegram-bot/core/models/__init__.py b/apps/telegram-bot/core/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apps/telegram-bot/core/models/database_model.py b/apps/telegram-bot/core/models/database_model.py
new file mode 100644
index 0000000..e87493f
--- /dev/null
+++ b/apps/telegram-bot/core/models/database_model.py
@@ -0,0 +1,4 @@
+from core.models.telegram_chat import document_list as telegram_chat_document_list
+
+# Only TelegramMessage document model for the bot's database
+document_list = list(telegram_chat_document_list)
diff --git a/apps/telegram-bot/core/models/telegram_chat.py b/apps/telegram-bot/core/models/telegram_chat.py
new file mode 100644
index 0000000..42a2f67
--- /dev/null
+++ b/apps/telegram-bot/core/models/telegram_chat.py
@@ -0,0 +1,33 @@
+from typing import Optional
+from datetime import datetime
+
+from pydantic import BaseModel, Field
+from beanie import Document, Indexed
+
+
+class TelegramUser(BaseModel):
+    id: int
+    is_bot: bool
+    first_name: str
+    last_name: Optional[str] = None
+    username: Optional[str] = None
+    language_code: Optional[str] = None
+
+
+class TelegramChat(BaseModel):
+    id: int
+    type: str
+    title: Optional[str] = None
+    username: Optional[str] = None
+    first_name: Optional[str] = None
+    last_name: Optional[str] = None
+
+
+class TelegramMessage(Document):
+    date: Indexed(datetime) = Field(default_factory=datetime.utcnow)
+    chat: TelegramChat
+    user: TelegramUser
+    text: str = Field(default="unknown")
+
+
+document_list = [TelegramMessage]
diff --git a/apps/telegram-bot/core/services/__init__.py b/apps/telegram-bot/core/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apps/telegram-bot/core/services/bot_app.py b/apps/telegram-bot/core/services/bot_app.py
new file mode 100644
index 0000000..6ecee75
--- /dev/null
+++ b/apps/telegram-bot/core/services/bot_app.py
@@ -0,0 +1,183 @@
+import mimetypes
+
+mimetypes.init()
+
+from telegram import (
+    Update,
+    MessageEntity,
+)
+from telegram.ext import (
+    Application,
+    MessageHandler,
+    CallbackQueryHandler,
+    filters,
+    InvalidCallbackData,
+    AIORateLimiter,
+)
+
+from fastfetchbot_shared.utils.logger import logger
+from core.config import (
+    TELEGRAM_BOT_TOKEN,
+    TELEGRAM_BOT_MODE,
+    TELEGRAM_WEBHOOK_URL,
+    TELEGRAM_BOT_SECRET_TOKEN,
+    TELEBOT_API_SERVER,
+    TELEBOT_API_SERVER_FILE,
+    TELEBOT_LOCAL_FILE_MODE,
+    TELEBOT_CONNECT_TIMEOUT,
+    TELEBOT_READ_TIMEOUT,
+    TELEBOT_WRITE_TIMEOUT,
+    TELEBOT_MAX_RETRY,
+)
+
+from core.handlers.url_process import https_url_process, https_url_auto_process
+from core.handlers.buttons import buttons_process, invalid_buttons
+from core.handlers.messages import all_messages_process, error_process
+
+# Re-export for external consumers
+from core.services.message_sender import send_item_message  # noqa: F401
+
+"""
+application and handlers initialization
+"""
+
+
+async def set_webhook() -> bool:
+    logger.debug(f"set_webhook: {TELEGRAM_WEBHOOK_URL}, secret_token: {TELEGRAM_BOT_SECRET_TOKEN}")
+    return await application.bot.set_webhook(
+        url=TELEGRAM_WEBHOOK_URL, secret_token=TELEGRAM_BOT_SECRET_TOKEN
+    )
+
+
+if TELEGRAM_BOT_TOKEN is not None:
+    builder = (
+        Application.builder()
+        .token(TELEGRAM_BOT_TOKEN)
+        .arbitrary_callback_data(True)
+        .connect_timeout(TELEBOT_CONNECT_TIMEOUT)
+        .read_timeout(TELEBOT_READ_TIMEOUT)
+        .write_timeout(TELEBOT_WRITE_TIMEOUT)
+        .base_url(TELEBOT_API_SERVER)
+        .base_file_url(TELEBOT_API_SERVER_FILE)
+        .local_mode(TELEBOT_LOCAL_FILE_MODE)
+        .rate_limiter(AIORateLimiter(max_retries=TELEBOT_MAX_RETRY))
+    )
+    if TELEGRAM_BOT_MODE == "webhook":
+        builder = builder.updater(None)
+    application = builder.build()
+else:
+    logger.error("TELEGRAM_BOT_TOKEN is not set!")
+
+
+async def startup() -> None:
+    await application.initialize()
+    # initialize handlers
+    all_messages_handler = MessageHandler(
+        filters=filters.ALL,
+        callback=all_messages_process,
+    )
+    https_url_process_handler = MessageHandler(
+        filters=filters.ChatType.PRIVATE
+                & filters.Entity(MessageEntity.URL)
+                & (~filters.FORWARDED)
+                & filters.USER,
+        callback=https_url_process,
+    )
+    https_url_auto_process_handler = MessageHandler(
+        filters=(
+                        filters.ChatType.SUPERGROUP
+                        | filters.ChatType.GROUP
+                        | filters.ChatType.GROUPS
+                )
+                & filters.Entity(MessageEntity.URL)
+                & (~filters.FORWARDED)
+                & filters.USER,
+        callback=https_url_auto_process,
+    )
+    invalid_buttons_handler = CallbackQueryHandler(
+        callback=invalid_buttons,
+        pattern=InvalidCallbackData,
+    )
+    buttons_process_handler = CallbackQueryHandler(
+        callback=buttons_process, pattern=dict
+    )
+    # add handlers
+    application.add_handlers(
+        [
+            https_url_process_handler,
+            https_url_auto_process_handler,
+            all_messages_handler,
+            invalid_buttons_handler,
+            buttons_process_handler,
+        ]
+    )
+    application.add_error_handler(error_process)
+    if application.post_init:
+        await application.post_init()
+    await application.start()
+
+
+async def start_polling() -> None:
+    """Start the Updater's polling loop. Only valid when TELEGRAM_BOT_MODE == 'polling'."""
+    if application.updater is None:
+        raise RuntimeError("Cannot start polling: updater is None (webhook mode?)")
+    await application.updater.start_polling(
+        drop_pending_updates=True,
+        allowed_updates=Update.ALL_TYPES,
+    )
+    logger.info("Polling started.")
+
+
+async def show_bot_info() -> None:
+    """Display basic status and information for the Telegram bot."""
+    try:
+        bot = application.bot
+        bot_info = await bot.get_me()
+
+        logger.info("=" * 60)
+        logger.info("TELEGRAM BOT INFORMATION")
+        logger.info("=" * 60)
+        logger.info(f"Bot Username: @{bot_info.username}")
+        logger.info(f"Bot Name: {bot_info.first_name}")
+        logger.info(f"Bot ID: {bot_info.id}")
+        logger.info(f"Can Join Groups: {bot_info.can_join_groups}")
+        logger.info(f"Can Read All Group Messages: {bot_info.can_read_all_group_messages}")
+        logger.info(f"Supports Inline Queries: {bot_info.supports_inline_queries}")
+        logger.info(f"Mode: {TELEGRAM_BOT_MODE}")
+
+        if TELEGRAM_BOT_MODE == "webhook":
+            webhook_info = await bot.get_webhook_info()
+            logger.info(f"Webhook URL: {webhook_info.url}")
+            logger.info(f"Webhook Has Custom Certificate: {webhook_info.has_custom_certificate}")
+            logger.info(f"Pending Update Count: {webhook_info.pending_update_count}")
+            if webhook_info.last_error_date:
+                logger.info(f"Last Error Date: {webhook_info.last_error_date}")
+                logger.info(f"Last Error Message: {webhook_info.last_error_message}")
+
+        logger.info("=" * 60)
+    except Exception as e:
+        logger.error(f"Error getting bot info: {e}")
+
+
+async def shutdown() -> None:
+    if application.updater and application.updater.running:
+        await application.updater.stop()
+    await application.stop()
+    if application.post_stop:
+        await application.post_stop()
+    await application.shutdown()
+    if application.post_shutdown:
+        await application.post_shutdown()
+
+
+async def process_telegram_update(
+        data: dict,
+) -> None:
+    """
+    Process telegram update, put it to the update queue.
+    :param data:
+    :return:
+    """
+    update = Update.de_json(data=data, bot=application.bot)
+    application.bot.insert_callback_data(update)
+    await application.update_queue.put(update)
diff --git a/apps/telegram-bot/core/services/constants.py b/apps/telegram-bot/core/services/constants.py
new file mode 100644
index 0000000..75cd5bb
--- /dev/null
+++ b/apps/telegram-bot/core/services/constants.py
@@ -0,0 +1,40 @@
+"""
+set variables for functions
+"""
+HTTPS_URL_REGEX = r"(http|https)://([\w.!@#$%^&*()_+-=])*\s*"
+
+"""
+telegram bot api constants
+"""
+TELEGRAM_SINGLE_MESSAGE_MEDIA_LIMIT = 10
+TELEGRAM_TEXT_LIMIT = 900
+TELEGRAM_FILE_UPLOAD_LIMIT = 52428800  # 50MB
+TELEGRAM_FILE_UPLOAD_LIMIT_LOCAL_API = 2147483648  # 2GB
+
+"""
+function constants
+"""
+REFERER_REQUIRED = ["douban", "weibo"]
+
+"""
+template translation(just a workaround)
+"""
+TEMPLATE_TRANSLATION = {
+    "en": {
+        "online_snapshot": "Online Snapshot",
+        "original_webpage": "Original Webpage",
+    },
+    "zh_CN": {
+        "online_snapshot": "原文备份",
+        "original_webpage": "阅读原文",
+    },
+    "zh_TW": {
+        "online_snapshot": "原文備份",
+        "original_webpage": "閱讀原文",
+    },
+}
+
+
+def template_translation(key: str, language: str = "zh_CN") -> str:
+    lang_dict = TEMPLATE_TRANSLATION.get(language, TEMPLATE_TRANSLATION["zh_CN"])
+    return lang_dict.get(key, key)
diff --git a/apps/telegram-bot/core/services/message_sender.py b/apps/telegram-bot/core/services/message_sender.py
new file mode 100644
index 0000000..3ed2c08
--- /dev/null
+++ b/apps/telegram-bot/core/services/message_sender.py
@@ -0,0 +1,345 @@
+import asyncio
+import os
+import traceback
+from io import BytesIO
+from urllib.parse import urlparse
+from urllib.request import url2pathname
+from typing import Union
+
+import aiofiles
+from telegram import (
+    Message,
+    InputMediaPhoto,
+    InputMediaVideo,
+    InputMediaDocument,
+    InputMediaAnimation,
+    InputMediaAudio,
+)
+from telegram.constants import ParseMode
+
+from fastfetchbot_shared.models.metadata_item import MessageType
+from fastfetchbot_shared.models.classes import NamedBytesIO
+from fastfetchbot_shared.utils.parse import telegram_message_html_trim
+from fastfetchbot_shared.utils.network import download_file_by_metadata_item
+from fastfetchbot_shared.utils.image import Image, image_compressing, check_image_type
+from fastfetchbot_shared.utils.logger import logger
+from core.config import (
+    TELEBOT_API_SERVER,
+    TELEBOT_WRITE_TIMEOUT,
+    TELEGRAM_IMAGE_DIMENSION_LIMIT,
+    TELEGRAM_IMAGE_SIZE_LIMIT,
+    JINJA2_ENV,
+    TEMPLATE_LANGUAGE,
+)
+from core.services.constants import (
+    TELEGRAM_SINGLE_MESSAGE_MEDIA_LIMIT,
+    TELEGRAM_FILE_UPLOAD_LIMIT,
+    TELEGRAM_FILE_UPLOAD_LIMIT_LOCAL_API,
+    TEMPLATE_TRANSLATION,
+)
+
+environment = JINJA2_ENV
+template = environment.get_template("social_media_message.jinja2")
+template_text = TEMPLATE_TRANSLATION.get(
+    TEMPLATE_LANGUAGE, TEMPLATE_TRANSLATION["zh_CN"]
+)
+
+
+def _get_application():
+    """Lazy import to avoid circular dependency."""
+    from core.services.bot_app import application
+    return application
+
+
+async def send_item_message(
+        data: dict, chat_id: Union[int, str] = None, message: Message = None
+) -> None:
+    """
+    :param data: (dict) metadata of the item
+    :param chat_id: (int) any chat id for sending
+    :param message: (Message) any message to reply
+    :return:
+    """
+    application = _get_application()
+    logger.debug(f"send_item_message: {data}, {chat_id}, {message}")
+    if not chat_id and not message:
+        raise ValueError("must provide chat_id or message")
+    if (
+            not chat_id
+    ) and message:  # this function supports direct reply to a message even if the chat_id is None
+        chat_id = message.chat.id
+    discussion_chat_id = chat_id
+    the_chat = await application.bot.get_chat(chat_id=chat_id)
+    logger.debug(f"the chat of sending message: {the_chat}")
+    if the_chat.type == "channel" and the_chat.linked_chat_id:
+        discussion_chat_id = the_chat.linked_chat_id
+    try:
+        caption_text = message_formatting(data)
+        if len(data["media_files"]) > 0:
+            # if the message type is short and there are some media files, send media group
+            reply_to_message_id = None
+            media_message_group, file_message_group = await media_files_packaging(
+                media_files=data["media_files"], data=data
+            )
+            if (
+                    len(media_message_group) > 0
+            ):  # if there are some media groups to send, send it
+                for i, media_group in enumerate(media_message_group):
+                    caption_text = (
+                        caption_text
+                        if i == 0
+                        else f"the {i + 1}th part of the media item:"
+                    )
+                    logger.debug(f"media group: {media_group}")
+                    logger.debug(
+                        f"caption text: {caption_text},length={len(caption_text)}"
+                    )
+                    sent_media_files_message = await application.bot.send_media_group(
+                        chat_id=chat_id,
+                        media=media_group,
+                        parse_mode=ParseMode.HTML,
+                        caption=caption_text,
+                        write_timeout=TELEBOT_WRITE_TIMEOUT,
+                        reply_to_message_id=message.message_id if message else None,
+                    )
+                    if sent_media_files_message is tuple:
+                        reply_to_message_id = sent_media_files_message[0].message_id
+                    elif sent_media_files_message is Message:
+                        reply_to_message_id = sent_media_files_message.message_id
+                    logger.debug(f"sent media files message: {sent_media_files_message}")
+            else:
+                sent_message = await application.bot.send_message(
+                    chat_id=chat_id,
+                    text=caption_text,
+                    parse_mode=ParseMode.HTML,
+                    reply_to_message_id=message.message_id if message else None,
+                    disable_web_page_preview=True
+                    if data["message_type"] == MessageType.SHORT
+                    else False,
+                    disable_notification=True,
+                )
+            if discussion_chat_id != chat_id:
+                await asyncio.sleep(
+                    3
+                )  # wait for several seconds to avoid missing the target message
+                # if the chat is a channel, get the latest pinned message from the channel and reply to it
+                group_chat = await application.bot.get_chat(chat_id=discussion_chat_id)
+                logger.debug(f"the group chat: {group_chat}")
+                pinned_message = group_chat.pinned_message
+                logger.debug(f"the pinned message: {pinned_message}")
+                if len(media_message_group) > 0:
+                    if (
+                            pinned_message.forward_origin.message_id
+                            == sent_media_files_message[-1].message_id
+                    ):
+                        reply_to_message_id = (
+                                group_chat.pinned_message.id
+                                - len(sent_media_files_message)
+                                + 1
+                        )
+                    else:
+                        reply_to_message_id = group_chat.pinned_message.id + 1
+                elif pinned_message.forward_origin.message_id == sent_message.message_id:
+                    reply_to_message_id = group_chat.pinned_message.id
+                else:
+                    reply_to_message_id = group_chat.pinned_message.id + 1
+            if (
+                    len(file_message_group) > 0
+            ):  # to send files, the files messages should be replied to the message sent before
+                logger.debug(f"reply_to_message_id: {reply_to_message_id}")
+                for file_group in file_message_group:
+                    logger.debug(f"file group: {file_group}")
+                    await application.bot.send_media_group(
+                        chat_id=discussion_chat_id,
+                        media=file_group,
+                        reply_to_message_id=reply_to_message_id,
+                        parse_mode=ParseMode.HTML,
+                        disable_notification=True,
+                    )
+        else:
+            await application.bot.send_message(
+                chat_id=chat_id,
+                text=caption_text,
+                parse_mode=ParseMode.HTML,
+                reply_to_message_id=message.message_id if message else None,
+                disable_web_page_preview=True
+                if data["message_type"] == "short"
+                else False,
+                disable_notification=True,
+            )
+    except Exception as e:
+        logger.error(e)
+        traceback.print_exc()
+        await send_debug_channel(traceback.format_exc())
+
+
+async def send_debug_channel(message: str) -> None:
+    from core.config import TELEBOT_DEBUG_CHANNEL
+    application = _get_application()
+    if TELEBOT_DEBUG_CHANNEL is not None:
+        await application.bot.send_message(
+            chat_id=TELEBOT_DEBUG_CHANNEL, text=message, parse_mode=ParseMode.HTML
+        )
+
+
+def message_formatting(data: dict) -> str:
+    """
+    Format the message to be sent to the user.
+    :param data:
+    :return: text (str) the formatted text for telegram bot api sending message.
+    """
+    if data["message_type"] == "short":
+        data["text"] = telegram_message_html_trim(data["text"])
+    message_template = template
+    text = message_template.render(data=data, template_text=template_text)
+    logger.debug(f"message text: \n{text}")
+    return text
+
+
+async def media_files_packaging(media_files: list, data: dict) -> tuple:
+    """
+    Download the media files from data["media_files"] and package them into a list of media group or file group for
+    sending them by send_media_group method or send_document method.
+    :param data: (dict) metadata of the item
+    :param media_files: (list) a list of media files,
+    :return: (tuple) a tuple of media group and file group
+        media_message_group: (list) a list of media items, the type of each item is InputMediaPhoto or InputMediaVideo
+        file_group: (list) a list of file items, the type of each item is InputFile
+    TODO: It's not a good practice for this function. This method will still download all the media files even when
+        media files are too large and it can be memory consuming even if we use a database to store the media files.
+        The function should be optimized to resolve the media files one group by one group and send each group
+        immediately after it is resolved.
+        This processing method should be optimized in the future.
+    """
+    media_counter, file_counter = 0, 0
+    media_message_group, media_group, file_message_group, file_group = [], [], [], []
+    for (
+            media_item
+    ) in media_files:  # To traverse all media items in the media files list
+        # check if we need to create a new media group
+        if media_counter == TELEGRAM_SINGLE_MESSAGE_MEDIA_LIMIT:
+            # the limitation of media item for a single telegram media group message is 10
+            media_message_group.append(media_group)
+            media_group = []
+            media_counter = 0
+        if file_counter == TELEGRAM_SINGLE_MESSAGE_MEDIA_LIMIT:
+            # the limitation of media item for a single telegram media group message is 10
+            file_message_group.append(file_group)
+            file_group = []
+            file_counter = 0
+        if not (
+                media_item["media_type"] in ["image", "gif", "video"]
+                and data["message_type"] == "long"
+        ):
+            # check the url validity
+            url_parser = urlparse(media_item["url"])
+            if url_parser.scheme in [
+                "http",
+                "https",
+            ]:  # if the url is a http url, download the file
+                file_format = "mp4" if media_item["media_type"] == "video" else None
+                io_object = await download_file_by_metadata_item(
+                    media_item["url"], data=data, file_format=file_format
+                )
+                filename = io_object.name
+                file_size = io_object.size
+            else:  # if the url is a local file path, just add it to the media group
+                try:
+                    file_path = url2pathname(media_item["url"])
+                    async with aiofiles.open(file_path, mode="rb") as f:
+                        filename = os.path.basename(file_path)
+                        content = await f.read()
+                        io_object = NamedBytesIO(content=content, name=filename)
+                    file_size = io_object.size
+                except Exception as e:  # the url is not a valid file path
+                    logger.error(e)
+                    continue
+            # check the file size
+            if (
+                    not TELEBOT_API_SERVER
+            ):  # the official telegram bot api server only supports 50MB file
+                if file_size > TELEGRAM_FILE_UPLOAD_LIMIT:
+                    # if the size is over 50MB, skip this file
+                    continue
+            else:
+                if file_size > TELEGRAM_FILE_UPLOAD_LIMIT_LOCAL_API:
+                    # for local api sever, if the size is over 2GB, skip this file
+                    continue
+            # check media files' type and process them by their type
+            if media_item["media_type"] == "image":
+                image_url = media_item["url"]
+                ext = await check_image_type(io_object)
+                # jpg to jpeg, ignore case
+                if ext.lower() == "jpg":
+                    ext = "JPEG"
+                io_object.seek(0)
+                image = Image.open(io_object, formats=[ext])
+                img_width, img_height = image.size
+                ratio = float(max(img_height, img_width)) / float(
+                    min(img_height, img_width)
+                )
+                # don't try to resize image if the ratio is too large
+                if (
+                        ratio < 5
+                        or max(img_height, img_width) < TELEGRAM_IMAGE_DIMENSION_LIMIT
+                ):
+                    image = image_compressing(image, TELEGRAM_IMAGE_DIMENSION_LIMIT)
+                    with BytesIO() as buffer:
+                        # mime_type file format
+                        image.save(buffer, format=ext)
+                        buffer.seek(0)
+                        resized_ratio = max(image.height, image.width) / min(
+                            image.height, image.width
+                        )
+                        logger.debug(
+                            f"resized image size: {buffer.getbuffer().nbytes}, ratio: {resized_ratio}, width: {image.width}, height: {image.height}"
+                        )
+                        media_group.append(InputMediaPhoto(buffer, filename=filename))
+                # the image is not able to get json serialized
+                logger.debug(
+                    f"image size: {file_size}, ratio: {ratio}, width: {img_width}, height: {img_height}"
+                )
+                if (
+                        file_size > TELEGRAM_IMAGE_SIZE_LIMIT
+                        or img_width > TELEGRAM_IMAGE_DIMENSION_LIMIT
+                        or img_height > TELEGRAM_IMAGE_DIMENSION_LIMIT
+                ) and data["category"] not in ["xiaohongshu"]:
+                    io_object = await download_file_by_metadata_item(
+                        url=image_url, data=data
+                    )
+                    if not io_object.name.endswith(".gif"):
+                        if not io_object.name.endswith(ext.lower()):
+                            io_object.name = io_object.name + "." + ext.lower()
+                        # TODO: it is not a good way to judge whether it is a gif...
+                        file_group.append(
+                            InputMediaDocument(io_object, parse_mode=ParseMode.HTML)
+                        )
+                        file_counter += 1
+            elif media_item["media_type"] == "gif":
+                io_object = await download_file_by_metadata_item(
+                    url=media_item["url"],
+                    data=data,
+                    file_name="gif_image-" + str(media_counter) + ".gif",
+                )
+                io_object.name = io_object.name + ".gif"
+                media_group.append(InputMediaAnimation(io_object))
+            elif media_item["media_type"] == "video":
+                media_group.append(InputMediaVideo(io_object, supports_streaming=True))
+            # TODO: not have any services to store audio files for now, just a placeholder
+            elif media_item["media_type"] == "audio":
+                media_group.append(InputMediaAudio(io_object))
+            elif media_item["media_type"] == "document":
+                file_group.append(
+                    InputMediaDocument(io_object, parse_mode=ParseMode.HTML)
+                )
+                file_counter += 1
+            media_counter += 1
+            logger.info(
+                f"get the {media_counter}th media item,type: {media_item['media_type']}, url: {media_item['url']}"
+            )
+    # check if the media group is empty, if it is, return None
+    if len(media_group) > 0:  # append the last media group
+        media_message_group.append(media_group)
+    if len(file_group) > 0:
+        file_message_group.append(file_group)
+    return media_message_group, file_message_group
diff --git a/apps/telegram-bot/core/templates/social_media_message.jinja2 b/apps/telegram-bot/core/templates/social_media_message.jinja2
new file mode 100644
index 0000000..55ad5a9
--- /dev/null
+++ b/apps/telegram-bot/core/templates/social_media_message.jinja2
@@ -0,0 +1,32 @@
+{# templates/social_media_message.html #}
+
+{% if data.message_type == "short" %}
+   {% if data.title and data.url and data.category in ['other','unknown'] %}
+<a href="{{ data.url }}">
+<b>{{ data.title }}</b>
+</a>
+   {% endif %}
+{{ data.text }}
+    {% if data.category in ['youtube', 'bilibili'] %}
+    {% endif %}
+
+#{{ data.category }}
+{% if data.telegraph_url %}
+<a href="{{ data.telegraph_url }}">{{ template_text.online_snapshot }}</a>
+{% endif %}
+{% else %}
+    {# title #}
+    {% if data.category in ['youtube', 'bilibili'] %}
+        <a href="{{ data.url }}">
+            <b>{{ data.title }}</b>
+        </a>
+    {% else %}<a href="{{ data.telegraph_url }}"><b>{{ data.title }}</b></a>{% endif %}
+
+via #{{ data.category }} - <a href="{{ data.author_url }}">{{ data.author }}</a>
+    {# the message, which is from inoreader comment or telegram bot comment function (todo) #}
+    {% if data.message %}{{ data.message }}
+    {% endif %}
+    {% if data.message_type == "long" %}<a href="{{ data.url }}">{{ template_text.original_webpage }}</a>{% endif %}
+{% endif %}
+
+{# the original page function only supports long type item #}
diff --git a/apps/telegram-bot/core/webhook/__init__.py b/apps/telegram-bot/core/webhook/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apps/telegram-bot/core/webhook/server.py b/apps/telegram-bot/core/webhook/server.py
new file mode 100644
index 0000000..8543d92
--- /dev/null
+++ b/apps/telegram-bot/core/webhook/server.py
@@ -0,0 +1,87 @@
+from contextlib import asynccontextmanager
+
+from starlette.applications import Starlette
+from starlette.routing import Route
+from starlette.requests import Request
+from starlette.responses import JSONResponse
+
+from core.services.bot_app import process_telegram_update
+from core.services.message_sender import send_item_message
+from core.config import TELEGRAM_BOT_SECRET_TOKEN
+from fastfetchbot_shared.utils.logger import logger
+
+
+@asynccontextmanager
+async def lifespan(app):
+    """
+    Starlette lifespan context manager.  Runs startup/shutdown inside the
+    SAME event loop that uvicorn uses for request handling.  This ensures the
+    python-telegram-bot Application's dispatcher, polling updater, and
+    update_queue all share one event loop.
+    """
+    from core.services.bot_app import startup, shutdown, set_webhook, start_polling, show_bot_info
+    from core.config import TELEGRAM_BOT_TOKEN, TELEGRAM_BOT_MODE, DATABASE_ON
+
+    # -- startup --
+    if DATABASE_ON:
+        from core import database
+        await database.startup()
+    if TELEGRAM_BOT_TOKEN:
+        await startup()
+        if TELEGRAM_BOT_MODE == "webhook":
+            await set_webhook()
+        else:
+            await start_polling()
+        await show_bot_info()
+
+    yield
+
+    # -- shutdown --
+    if TELEGRAM_BOT_TOKEN:
+        await shutdown()
+    if DATABASE_ON:
+        from core import database
+        await database.shutdown()
+
+
+async def telegram_webhook(request: Request):
+    secret = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
+    if secret != TELEGRAM_BOT_SECRET_TOKEN:
+        return JSONResponse({"error": "unauthorized"}, status_code=401)
+    data = await request.json()
+    await process_telegram_update(data)
+    return JSONResponse({"status": "ok"})
+
+
+async def send_message_endpoint(request: Request):
+    data = await request.json()
+    metadata_item = data["data"]
+    chat_id = data.get("chat_id")
+    if isinstance(chat_id, str) and chat_id.startswith("-"):
+        chat_id = int(chat_id)
+    await send_item_message(metadata_item, chat_id=chat_id)
+    return JSONResponse({"status": "ok"})
+
+
+async def health(request: Request):
+    return JSONResponse({"status": "healthy"})
+
+
+# Full webhook app (used in webhook mode)
+webhook_app = Starlette(
+    routes=[
+        Route("/webhook", telegram_webhook, methods=["POST"]),
+        Route("/send_message", send_message_endpoint, methods=["POST"]),
+        Route("/health", health, methods=["GET"]),
+    ],
+    lifespan=lifespan,
+)
+
+# Minimal app (used in polling mode — no /webhook route needed)
+callback_app = Starlette(
+    routes=[
+        Route("/send_message", send_message_endpoint, methods=["POST"]),
+        Route("/health", health, methods=["GET"]),
+    ],
+    lifespan=lifespan,
+)
diff --git a/apps/telegram-bot/pyproject.toml b/apps/telegram-bot/pyproject.toml
new file mode 100644
index 0000000..0ffbb01
--- /dev/null
+++ b/apps/telegram-bot/pyproject.toml
@@ -0,0 +1,27 @@
+[project]
+name = "fastfetchbot-telegram-bot"
+version = "0.1.0"
+requires-python = ">=3.12,<3.13"
+dependencies = [
+    "fastfetchbot-shared",
+    "python-telegram-bot[callback-data,rate-limiter]>=21.11",
+    "starlette>=0.45.0",
+    "uvicorn>=0.34.2",
+    "httpx>=0.28.1",
+    "jinja2>=3.1.6",
+    "beanie>=1.29.0",
+    "aiofiles>=24.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["core"]
+
+[tool.uv]
+package = false
+
+[tool.uv.sources]
+fastfetchbot-shared = { workspace = true }
diff --git a/docker-compose.template.yml b/docker-compose.template.yml
index be96b6c..e6e7b4d 100644
--- a/docker-compose.template.yml
+++ b/docker-compose.template.yml
@@ -1,7 +1,9 @@
 services:
-  fastfetchbot:
-    image: aturret/fast-fetch-bot:latest
-    container_name: fastfetchbot
+  api:
+    build:
+      context: .
+      dockerfile: apps/api/Dockerfile
+    container_name: fastfetchbot-api
     # restart: always
     volumes:
       - ./conf:/app/conf
@@ -13,6 +15,22 @@ services:
     depends_on:
       - telegram-bot-api
 
+  telegram-bot:
+    build:
+      context: .
+      dockerfile: apps/telegram-bot/Dockerfile
+    container_name: fastfetchbot-telegram-bot
+    # restart: always
+    env_file:
+      - .env
+    environment:
+      - API_SERVER_URL=http://api:10450
+    ports:
+      - 10451:10451
+    depends_on:
+      - api
+      - telegram-bot-api
+
   telegram-bot-api:
     image: aiogram/telegram-bot-api:latest
     container_name: telegrambotapi
@@ -41,7 +59,7 @@ services:
     ports:
       - 4000:4000
     depends_on:
-      - fastfetchbot
+      - api
 
 
 volumes:
diff --git a/packages/shared/fastfetchbot_shared/__init__.py b/packages/shared/fastfetchbot_shared/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/packages/shared/fastfetchbot_shared/config.py b/packages/shared/fastfetchbot_shared/config.py
new file mode 100644
index 0000000..ec80059
--- /dev/null
+++ b/packages/shared/fastfetchbot_shared/config.py
@@ -0,0 +1,19 @@
+import os
+import tempfile
+
+from fastfetchbot_shared.utils.parse import get_env_bool
+
+env = os.environ
+
+# Filesystem environment variables
+TEMP_DIR = env.get("TEMP_DIR", tempfile.gettempdir())
+WORK_DIR = env.get("WORK_DIR", os.getcwd())
+DOWNLOAD_DIR = env.get("DOWNLOAD_DIR", os.path.join(WORK_DIR, "download"))
+DEBUG_MODE = get_env_bool(env, "DEBUG_MODE", False)
+
+# Logging environment variables
+LOG_FILE_PATH = env.get("LOG_FILE_PATH", TEMP_DIR)
+LOG_LEVEL = env.get("LOG_LEVEL", "DEBUG")
+
+# Utils environment variables
+HTTP_REQUEST_TIMEOUT = env.get("HTTP_REQUEST_TIMEOUT", 30)
diff --git a/packages/shared/fastfetchbot_shared/models/__init__.py b/packages/shared/fastfetchbot_shared/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/packages/shared/fastfetchbot_shared/models/classes.py b/packages/shared/fastfetchbot_shared/models/classes.py
new file mode 100644
index 0000000..2fab80a
--- /dev/null
+++ b/packages/shared/fastfetchbot_shared/models/classes.py
@@ -0,0 +1,17 @@
+from io import BytesIO
+
+
+class NamedBytesIO(BytesIO):
+    @property
+    def name(self):
+        return self._name
+
+    def __init__(self, content=None, name=None):
+        super().__init__(content)
+        self._name = name
+        if content is not None:
+            self.size = self.getbuffer().nbytes
+
+    @name.setter
+    def name(self, value):
+        self._name = value
diff --git a/packages/shared/fastfetchbot_shared/models/metadata_item.py b/packages/shared/fastfetchbot_shared/models/metadata_item.py
new file mode 100644
index 0000000..6b5820d
--- /dev/null
+++ b/packages/shared/fastfetchbot_shared/models/metadata_item.py
@@ -0,0 +1,123 @@
+from dataclasses import dataclass
+from enum import Enum, unique
+from typing import Any, List, TypeVar, Callable, Type, cast, Union, Optional
+
+from pydantic import BaseModel
+
+"""
+MetadataItem is a dataclass that represents a single item for our services. It would be saved in the database.
+The MetadataItem is used to send to the telegram bot. Users can use the metadata to define their own message template.
+If the program doesn't find the attribute in the dict_data, it will use the default value in case of KeyError.
+"""
+
+T = TypeVar("T")
+
+
+def from_str(x: Any) -> str:
+    if x is None:
+        return ""
+    assert isinstance(x, str)
+    return x
+
+
+def from_list(f: Callable[[Any], T], x: Any) -> List[T]:
+    assert isinstance(x, list)
+    return [f(y) for y in x]
+
+
+def to_class(c: Type[T], x: Any) -> dict:
+    assert isinstance(x, c)
+    return cast(Any, x).to_dict()
+
+
+@unique
+class MessageType(str, Enum):
+    SHORT = "short"
+    LONG = "long"
+
+
+@dataclass
+class MediaFile:
+    media_type: str
+    url: str
+    original_url: Optional[str] = None
+    caption: Optional[str] = None
+
+    @staticmethod
+    def from_dict(obj: Any) -> "MediaFile":
+        assert isinstance(obj, dict)
+        media_type = from_str(obj.get("media_type"))
+        url = from_str(obj.get("url"))
+        caption = from_str(obj.get("caption"))
+        return MediaFile(media_type, url, caption)
+
+    def to_dict(self) -> dict:
+        result: dict = {}
+        result["media_type"] = from_str(self.media_type)
+        result["url"] = from_str(self.url)
+        result["caption"] = self.caption
+        return result
+
+
+@dataclass
+class MetadataItem:
+    url: str
+    telegraph_url: Optional[str]
+    content: Optional[str]
+    text: Optional[str]
+    media_files: List[MediaFile]
+    author: str
+    title: str
+    author_url: Optional[str]
+    category: str
+    message_type: Optional[MessageType]
+
+    @staticmethod
+    def from_dict(obj: Any) -> "MetadataItem":
+        assert isinstance(obj, dict)
+        url = from_str(obj.get("url"))
+        telegraph_url = from_str(obj.get("telegraph_url"))
+        content = from_str(obj.get("content"))
+        text = from_str(obj.get("text"))
+        media_files = from_list(MediaFile.from_dict, obj.get("media_files"))
+        author = from_str(obj.get("author"))
+        title = from_str(obj.get("title"))
+        author_url = from_str(obj.get("author_url"))
+        category = from_str(obj.get("category"))
+        message_type = MessageType(obj.get("message_type"))
+        return MetadataItem(
+            url,
+            telegraph_url,
+            content,
+            text,
+            media_files,
+            author,
+            title,
+            author_url,
+            category,
+            message_type,
+        )
+
+    def to_dict(self) -> dict:
+        result: dict = {
+            "url": from_str(self.url),
+            "telegraph_url": "", "content": from_str(self.content),
+            "text": from_str(self.text),
+            "media_files": from_list(
+                lambda x: to_class(MediaFile, x), self.media_files
+            ),
+            "author": from_str(self.author),
+            "title": from_str(self.title),
+            "author_url": from_str(self.author_url),
+            "category": from_str(self.category),
+            "message_type": self.message_type.value
+        }
+        return result
+
+
+def metadata_item_from_dict(s: Any) -> MetadataItem:
+    return MetadataItem.from_dict(s)
+
+
+def metadata_item_to_dict(x: MetadataItem) -> Any:
+    return to_class(MetadataItem, x)
diff --git a/packages/shared/fastfetchbot_shared/models/telegraph_item.py b/packages/shared/fastfetchbot_shared/models/telegraph_item.py
new file mode 100644
index 0000000..04d5b77
--- /dev/null
+++ b/packages/shared/fastfetchbot_shared/models/telegraph_item.py
@@ -0,0 +1,58 @@
+from dataclasses import dataclass
+from typing import Any, TypeVar, Type, cast
+
+"""
+The TelegraphItem is a class for generating a Telegraph page.
+If the program doesn't find the attribute in the dict_data, it will use the default value in case of KeyError.
+"""
+
+T = TypeVar("T")
+
+
+def from_str(x: Any) -> str:
+    assert isinstance(x, str)
+    return x
+
+
+def to_class(c: Type[T], x: Any) -> dict:
+    assert isinstance(x, c)
+    return cast(Any, x).to_dict()
+
+
+@dataclass
+class TelegraphItem:
+    title: str
+    url: str
+    author: str
+    author_url: str
+    category: str
+    content: str
+
+    @staticmethod
+    def from_dict(obj: Any) -> 'TelegraphItem':
+        assert isinstance(obj, dict)
+        title = from_str(obj.get("title"))
+        url = from_str(obj.get("url"))
+        author = from_str(obj.get("author"))
+        author_url = from_str(obj.get("author_url"))
+        category = from_str(obj.get("category"))
+        content = from_str(obj.get("content"))
+        return TelegraphItem(title, url, author, author_url, category, content)
+
+    def to_dict(self) -> dict:
+        result: dict = {}
+        result["title"] = from_str(self.title)
+        result["url"] = from_str(self.url)
+        result["author"] = from_str(self.author)
+        result["author_url"] = from_str(self.author_url)
+        result["category"] = from_str(self.category)
+        result["content"] = from_str(self.content)
+        return result
+
+
+def telegraph_item_from_dict(s: Any) -> TelegraphItem:
+    return TelegraphItem.from_dict(s)
+
+
+def telegraph_item_to_dict(x: TelegraphItem) -> Any:
+    return to_class(TelegraphItem, x)
diff --git a/packages/shared/fastfetchbot_shared/models/url_metadata.py b/packages/shared/fastfetchbot_shared/models/url_metadata.py
new file mode 100644
index 0000000..a581045
--- /dev/null
+++ b/packages/shared/fastfetchbot_shared/models/url_metadata.py
@@ -0,0 +1,50 @@
+import re
+from dataclasses import dataclass
+from typing import Any, TypeVar, Type, cast
+
+T = TypeVar("T")
+
+
+def from_str(x: Any) -> str:
+    assert isinstance(x, str)
+    return x
+
+
+def to_class(c: Type[T], x: Any) -> dict:
+    assert isinstance(x, c)
+    return cast(Any, x).to_dict()
+
+
+@dataclass
+class UrlMetadata:
+    url: str
+    source: str
+    content_type: str
+
+    def __init__(self, url: str, source: str, content_type: str) -> None:
+        self.url = url
+        self.source = source
+        self.content_type = content_type
+
+    @staticmethod
+    def from_dict(obj: Any) -> "UrlMetadata":
+        assert isinstance(obj, dict)
+        url = from_str(obj.get("url"))
+        source = from_str(obj.get("source"))
+        the_type = from_str(obj.get("type"))
+        return UrlMetadata(url, source, the_type)
+
+    def to_dict(self) -> dict:
+        result: dict = {}
+        result["url"] = from_str(self.url)
+        result["source"] = from_str(self.source)
+        result["content_type"] = from_str(self.content_type)
+        return result
+
+
+def url_metadata_from_dict(s: Any) -> UrlMetadata:
+    return UrlMetadata.from_dict(s)
+
+
+def url_metadata_to_dict(x: UrlMetadata) -> Any:
+    return to_class(UrlMetadata, x)
diff --git a/packages/shared/fastfetchbot_shared/utils/__init__.py b/packages/shared/fastfetchbot_shared/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/packages/shared/fastfetchbot_shared/utils/config.py b/packages/shared/fastfetchbot_shared/utils/config.py
new file mode 100644
index 0000000..bad822d
--- /dev/null
+++ b/packages/shared/fastfetchbot_shared/utils/config.py
@@ -0,0 +1,55 @@
+"""
+patterns for check url type
+"""
+SOCIAL_MEDIA_WEBSITE_PATTERNS = {
+    "weibo": [
+        r"(m\.)?weibo.cn\/(status\/)?[0-9a-zA-Z]+",
+        r"(www\.)?weibo\.com\/(status\/)?[0-9a-zA-Z]+",
+    ],
+    "twitter": [r"(twitter|x)\.com\/[^\/]+\/status\/[0-9]+"],
+    "instagram": [r"(www\.)?instagram\.com(\/share)?\/(p|reel)\/[A-Za-z0-9_-]+"],
+    "zhihu": [
+        r"(www\.)?zhihu\.com\/question\/[0-9]+\/answer\/[0-9]+",
+        r"(www\.)?zhihu\.com\/answer\/[0-9]+",
+        r"(www\.)?zhihu\.com\/aria\/answer\/[0-9]+",
+        r"(www\.)?zhihu\.com\/aria\/question\/[0-9]+\/answer\/[0-9]+",
+        r"(www\.)?zhihu\.com\/pin\/[0-9]+",
+        r"zhuanlan\.zhihu\.com\/p\/[0-9]+",
+    ],
+    "douban": [
+        r"(game|music|movie|book)?\.douban\.com\/review\/[0-9]+",
+        r"((www|m)\.)?douban\.com\/note\/[0-9]+",
+        r"((www|m)\.)?douban\.com\/people\/[^\/]+\/status\/[0-9]+",
+        r"((www|m)\.)?douban\.com\/group\/topic\/[0-9]+",
+        r"((www|m)\.)?douban\.com\/(game|music|movie|book)\/review\/[0-9]+",
+    ],
+    "wechat": [r"mp\.weixin\.qq\.com\/s", r"mp\.weixin\.qq\.com\/mp\/appmsg\/show"],
+    "threads": [r"(www\.)?threads\.net\/@[a-zA-Z0-9]+\/post"],
+    "xiaohongshu": [
+        r"(www\.)?xiaohongshu\.com\/(discovery\/item|explore)\/[0-9a-zA-Z_-]+",
+        r"(www\.)?xhslink\.com\/[0-9a-zA-Z_-]+",
+    ],
+    "reddit": [
+        r"(www\.)?reddit\.com\/r\/[a-zA-Z0-9_-]+\/comments\/[a-zA-Z0-9_-]+",
+        r"(www\.)?reddit\.com\/r\/[a-zA-Z0-9_-]+\/s\/[a-zA-Z0-9_-]+",
+    ],
+    "bluesky": [
+        r"(www\.)?bsky\.app\/profile/[a-zA-Z0-9\.]+\/post\/[a-zA-Z0-9\-_]+",
+    ]
+}
+VIDEO_WEBSITE_PATTERNS = {
+    "youtube": [
+        r"((m|www)\.)youtube\.com\/watch",
+        r"youtu\.be\/[A-Za-z0-9_-]+",
+        r"youtube\.com\/shorts\/[A-Za-z0-9_-]+",
+    ],
+    "bilibili": [
+        r"((www\.)?bilibili\.com\/video\/[A-Za-z0-9]+)",
+        r"b23\.tv\/[A-Za-z0-9]+",
+    ],
+}
+BANNED_PATTERNS = [
+    r"chatgpt\.com\/share\/[A-Za-z0-9]+",
+    r"gemini\/share\/[A-Za-z0-9]+",
+    r"t\.me\/[A-Za-z0-9]+"
+]
diff --git a/packages/shared/fastfetchbot_shared/utils/image.py b/packages/shared/fastfetchbot_shared/utils/image.py
new file mode 100644
index 0000000..66cf94d
--- /dev/null
+++ b/packages/shared/fastfetchbot_shared/utils/image.py
@@ -0,0 +1,46 @@
+import mimetypes
+from io import BytesIO
+
+import magic
+from PIL import Image
+import asyncio
+import os
+
+DEFAULT_IMAGE_LIMITATION = int(os.environ.get("DEFAULT_IMAGE_LIMITATION", 1600))
+
+
+def get_image_dimension(image_file: str):
+    image = Image.open(image_file)
+    return image.size
+
+
+def image_compressing(image: Image, limitation: int = DEFAULT_IMAGE_LIMITATION):
+    new_image = image
+    if image.size[0] > limitation or image.size[1] > limitation:
+        if image.size[0] > image.size[1]:
+            new_image = image.resize(
+                (limitation, int(image.size[1] * limitation / image.size[0])),
+                Image.Resampling.LANCZOS,
+            )
+        else:
+            new_image = image.resize(
+                (int(image.size[0] * limitation / image.size[1]), limitation),
+                Image.Resampling.LANCZOS,
+            )
+    return new_image
+
+
+async def check_image_type(io_object: BytesIO):
+    loop = asyncio.get_running_loop()
+    mime_type = await loop.run_in_executor(
+        None, lambda: magic.from_buffer(io_object.read(), mime=True)
+    )
+    if mime_type == "image/webp":
+        ext = "webp"
+    else:
+        ext = mimetypes.guess_extension(mime_type, strict=True)
+        if ext is None:
+            ext = "webp"
+        else:
+            ext = ext[1:]
+    return ext
diff --git a/packages/shared/fastfetchbot_shared/utils/logger.py b/packages/shared/fastfetchbot_shared/utils/logger.py
new file mode 100644
index 0000000..2340539
--- /dev/null
+++ b/packages/shared/fastfetchbot_shared/utils/logger.py
@@ -0,0 +1,17 @@
+import os
+
+from loguru import logger
+
+from fastfetchbot_shared.config import LOG_LEVEL, LOG_FILE_PATH
+
+log_path = os.path.join(LOG_FILE_PATH, "app.log")
+
+logger.add(
+    log_path,
+    level=LOG_LEVEL,
+    rotation="1 week",
+    retention="10 days",
+    compression="zip",
+)
+logger.debug(f"Logger initialized with level: {LOG_LEVEL}")
+logger.debug(f"Logger initialized with log file path: {log_path}")
diff --git a/packages/shared/fastfetchbot_shared/utils/network.py b/packages/shared/fastfetchbot_shared/utils/network.py
new file mode 100644
index 0000000..d21d616
--- /dev/null
+++ b/packages/shared/fastfetchbot_shared/utils/network.py
@@ -0,0 +1,200 @@
+import asyncio
+import os
+import uuid
+from typing import Optional
+
+import aiofiles
+import httpx
+import traceback
+
+from lxml import etree
+from fake_useragent import UserAgent
+from playwright.async_api import async_playwright
+
+from fastfetchbot_shared.models.classes import NamedBytesIO
+from fastfetchbot_shared.config import HTTP_REQUEST_TIMEOUT, DOWNLOAD_DIR
+from fastfetchbot_shared.utils.image import check_image_type
+from fastfetchbot_shared.utils.logger import logger
+
+
+async def get_response(
+        url: str, headers: dict = None, params: dict = None, client: httpx.AsyncClient = None
+) -> httpx.Response:
+    if headers is None:
+        headers = HEADERS
+    if client:
+        resp = await client.get(
+            url, headers=headers, params=params, timeout=HTTP_REQUEST_TIMEOUT
+        )
+        return resp
+    else:
+        async with httpx.AsyncClient() as client:
+            resp = await client.get(
+                url, headers=headers, params=params, timeout=HTTP_REQUEST_TIMEOUT
+            )
+            return resp
+
+
+async def get_response_json(url: str, headers=None, client: httpx.AsyncClient = None) -> dict:
+    try:
+        response = await get_response(url, headers=headers, client=client)
+        json_result = response.json()
+    except Exception as e:
+        print(e, traceback.format_exc())
+        json_result = None
+    return json_result
+
+
+
+async def get_selector(
+        url: str, headers: dict, follow_redirects: bool = True
+) -> etree.HTML:
+    """
+    A function to get etree.HTML selector according to url and headers.
+    We can use this function to do additional parsing works.
+    :param follow_redirects:
+    :param url: the target webpage url
+    :param headers: the headers of the request
+    :return: the selector of the target webpage parsed by etree.HTML
+    """
+    async with httpx.AsyncClient() as client:
+        resp = await client.get(
+            url,
+            headers=headers,
+            follow_redirects=follow_redirects,
+            timeout=HTTP_REQUEST_TIMEOUT,
+        )
+        if (
+                resp.history
+        ):  # if there is a redirect, the request will have a response chain
+            print("Request was redirected")
+            for h in resp.history:
+                print(h.status_code, h.url)
+                # if code is 302, do not follow the redirect
+                if h.status_code == 302:
+                    selector = await get_selector(
+                        h.url, headers=headers, follow_redirects=False
+                    )
+                    return selector
+            print("Final destination:", resp.status_code, resp.url)
+        selector = etree.HTML(resp.text)  # the content of the final destination
+        return selector
+
+
+async def get_redirect_url(url: str, headers: Optional[dict] = None) -> str:
+    if not headers:
+        headers = HEADERS
+    async with httpx.AsyncClient() as client:
+        resp = await client.get(url, headers=headers, timeout=HTTP_REQUEST_TIMEOUT)
+        if resp.status_code == 302 or resp.status_code == 301:
+            return resp.headers["Location"]
+        else:
+            return url
+
+
+async def get_content_async(url):
+    async with async_playwright() as p:
+        browser = await p.firefox.launch()
+        context = await browser.new_context(viewport={"width": 1920, "height": 1080})
+        page = await context.new_page()
+
+        async def scroll_to_end(page):
+            # Scrolls to the bottom of the page
+            await page.evaluate("""
+                async () => {
+                    const delay = ms => new Promise(resolve => setTimeout(resolve, ms));
+                    while (document.scrollingElement.scrollTop + window.innerHeight < document.scrollingElement.scrollHeight) {
+                        document.scrollingElement.scrollTop += 100;  // Adjust the scroll amount
+                        await delay(100);  // Adjust the delay time
+                    }
+                }
+            """)
+
+        async def wait_for_network_idle():
+            async with page.expect_response("**/api/content") as response_info:
+                response = await response_info.value
+                if response.status == 200:
+                    print("Content loaded")
+
+        await page.goto(url)
+        await wait_for_network_idle()
+        await scroll_to_end(page)
+        content = await page.content()
+        await browser.close()
+        return content
+
+
+async def download_file_by_metadata_item(
+        url: str,
+        data: dict,
+        file_name: str = None,
+        file_format: str = None,
+        headers: dict = None,
+) -> NamedBytesIO:
+    """
+    A customized function to download a file from url and return a NamedBytesIO object.
+    :param file_format:
+    :param data:
+    :param url:
+    :param file_name:
+    :param headers:
+    :return:
+    """
+    try:
+        if headers is None:
+            headers = HEADERS
+        headers["User-Agent"] = get_random_user_agent()
+        headers["referer"] = data["url"]
+        if data["category"] in ["reddit"]:
+            headers["Accept"] = "image/avif,image/webp,*/*"
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                url=url, headers=headers, timeout=HTTP_REQUEST_TIMEOUT
+            )
+            # if redirect 302, get the final url
+            if response.status_code == 302 or response.status_code == 301:
+                url = response.headers["Location"]
+        file_data = response.content
+        if file_name is None:
+            file_format = file_format if file_format else url.split(".")[-1]
+            file_name = "media-" + str(uuid.uuid1())[:8] + "." + file_format
+        io_object = NamedBytesIO(file_data, name=file_name)
+        return io_object
+    except Exception as e:
+        await asyncio.sleep(2)
+        logger.error(f"Failed to download {url}, {e}")
+
+
+async def download_file_to_local(
+        url: str,
+        file_path: str = None,
+        dir_path: str = DOWNLOAD_DIR,
+        file_name: str = "",
+        headers: dict = None,
+        referer: str = None,
+) -> str:
+    io_object = await download_file_by_metadata_item(url=url, data={}, file_name=file_name, headers=headers)
+    ext = await check_image_type(io_object)
+    io_object.seek(0)
+    file_name = file_name + uuid.uuid4().hex + "." + ext
+    logger.info(f"Downloading {file_name}")
+    if file_path is None and dir_path is not None:
+        file_path = os.path.join(dir_path, file_name)
+    async with aiofiles.open(file_path, "wb") as f:
+        await f.write(io_object.read())
+    return file_path
+
+
+def get_random_user_agent() -> str:
+    ua = UserAgent()
+    return ua.random
+
+
+"""
+default headers
+"""
+
+HEADERS = {
+    "User-Agent": get_random_user_agent(),
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+}
diff --git a/packages/shared/fastfetchbot_shared/utils/parse.py b/packages/shared/fastfetchbot_shared/utils/parse.py
new file mode 100644
index 0000000..75076b7
--- /dev/null
+++ b/packages/shared/fastfetchbot_shared/utils/parse.py
@@ -0,0 +1,224 @@
+import datetime
+import os
+import re
+import mimetypes
+from typing import Optional
+from urllib.parse import urlparse, unquote
+
+from bs4 import BeautifulSoup
+
+from fastfetchbot_shared.models.url_metadata import UrlMetadata
+from fastfetchbot_shared.utils.config import SOCIAL_MEDIA_WEBSITE_PATTERNS, VIDEO_WEBSITE_PATTERNS, BANNED_PATTERNS
+
+TELEGRAM_TEXT_LIMIT = 900
+
+mimetypes.init()
+
+
+def get_html_text_length(html: str) -> int:
+    if html is None:
+        return 0
+    soup = BeautifulSoup(html, "html.parser")
+    text = soup.get_text()
+    return len(text)
+
+
+def format_telegram_short_text(soup: BeautifulSoup) -> BeautifulSoup:
+    decompose_list = ["br"]
+    unwrap_list = ["span", "div", "blockquote", "h2", "ol", "ul"]
+    new_line_list = ["p", "li"]
+    for decompose in decompose_list:
+        for item in soup.find_all(decompose):
+            item.decompose()
+    for unwrap in unwrap_list:
+        for item in soup.find_all(unwrap):
+            item.unwrap()
+    for (
+            new_line
+    ) in (
+            new_line_list
+    ):  # add a new line after each <p> and <li> tag and then remove the tag(unwrapping)
+        for item in soup.find_all(new_line):
+            item.append(BeautifulSoup("<br>", "html.parser"))
+            item.unwrap()
+    return soup
+
+
+def unix_timestamp_to_utc(timestamp: int) -> str | None:
+    if not timestamp:
+        return None
+    utc_time = datetime.datetime.utcfromtimestamp(timestamp)
+    beijing_time = utc_time + datetime.timedelta(hours=8)
+    return beijing_time.strftime("%Y-%m-%d %H:%M")
+
+
+def second_to_time(second: int) -> str:
+    m, s = divmod(second, 60)
+    h, m = divmod(m, 60)
+    return "{:02d}:{:02d}:{:02d}".format(h, m, s)
+
+
+def string_to_list(string: str, divider: str = ",") -> list:
+    if string is None:
+        return []
+    return string.split(divider)
+
+
+async def get_url_metadata(url: str, ban_list: Optional[list] = None) -> UrlMetadata:
+    if not ban_list:
+        ban_list = []
+    url_parser = urlparse(url)
+    url_main = str(url_parser.hostname) + str(url_parser.path)
+    source, content_type = "unknown", "unknown"
+    # check if the url is a social media platform website
+    for website, patterns in SOCIAL_MEDIA_WEBSITE_PATTERNS.items():
+        for pattern in patterns:
+            if re.search(pattern, url_main):
+                source = website
+                content_type = "social_media"
+    # check if the url is a video website
+    if source == "unknown":
+        for website, patterns in VIDEO_WEBSITE_PATTERNS.items():
+            for pattern in patterns:
+                if re.search(pattern, url_main):
+                    source = website
+                    content_type = "video"
+    # clear the url query
+    if source not in ["youtube", "bilibili", "wechat"]:
+        url = url_parser.scheme + "://" + url_parser.netloc + url_parser.path
+    if source in ban_list:
+        source = "banned"
+        content_type = "banned"
+    else:
+        for item in BANNED_PATTERNS:
+            if re.search(item, url):
+                source = "banned"
+                content_type = "banned"
+                break
+    # TODO: check if the url is from Mastodon, according to the request cookie
+    return UrlMetadata(url=url, source=source, content_type=content_type)
+
+
+def get_ext_from_url(url: str) -> str:
+    url_object = urlparse(url)
+    filename = unquote(url_object.path)
+    ext = os.path.splitext(filename)[1]
+    # check if ext in mimetypes.types_map
+    if ext in mimetypes.types_map:
+        return ext
+    else:
+        return None
+
+
+def wrap_text_into_html(text: str, is_html: bool = False) -> str:
+    if is_html:
+        soup = BeautifulSoup(text, "html.parser")
+        for item in soup.find_all("br"):
+            item.replace_with("\n")
+        text = str(soup)
+    text_list = text.split("\n")
+    text_list = [f"<p>{item}</p>" for item in text_list if item.strip() != ""]
+    text = "".join(text_list)
+    return text
+
+
+def telegram_message_html_trim(html_content: str, trim_length: int = TELEGRAM_TEXT_LIMIT) -> str:
+    from bs4 import Doctype
+
+    soup = BeautifulSoup(html_content, "html.parser")
+
+    # Remove DOCTYPE declarations
+    for item in soup.contents:
+        if isinstance(item, Doctype):
+            item.extract()
+
+    # Decompose tags that should be removed entirely (with their content)
+    for tag_name in ["img", "script", "style", "head", "meta", "link", "noscript", "iframe", "svg", "form", "input", "button"]:
+        for tag in soup.find_all(tag_name):
+            tag.decompose()
+
+    # Unwrap structural/layout tags — keep their text, discard the wrapper
+    for tag_name in ["div", "span", "section", "article", "nav", "header", "footer",
+                     "main", "aside", "figure", "figcaption", "html", "body"]:
+        for tag in soup.find_all(tag_name):
+            tag.unwrap()
+
+    # Convert headings to bold text with line break
+    for level in range(1, 7):
+        for tag in soup.find_all(f"h{level}"):
+            tag.name = "b"
+
+    # Unwrap <p> tags (keep text content)
+    for tag in soup.find_all("p"):
+        tag.unwrap()
+
+    html_content = str(soup).strip()
+
+    if len(html_content) <= trim_length:
+        return html_content
+
+    # Initial trimming
+    trimmed_content = html_content[:trim_length]
+
+    # Find the position of the last complete tag in the trimmed content
+    last_complete_pos = trimmed_content.rfind('<')
+    if last_complete_pos != -1:
+        trimmed_content = trimmed_content[:last_complete_pos]
+
+    # Remove any incomplete tags by ensuring each tag is closed
+    cleaned_html = ''
+    open_tags = []
+
+    tag_pattern = re.compile(r'<(/?)([a-zA-Z0-9]+)([^>]*)>')
+    pos = 0
+
+    while pos < len(trimmed_content):
+        match = tag_pattern.search(trimmed_content, pos)
+        if not match:
+            break
+
+        start, end = match.span()
+        cleaned_html += trimmed_content[pos:start]
+
+        closing, tag_name, attributes = match.groups()
+
+        if closing:
+            if open_tags and open_tags[-1] == tag_name:
+                open_tags.pop()
+                cleaned_html += match.group(0)
+        else:
+            if not attributes.endswith('/'):
+                open_tags.append(tag_name)
+                cleaned_html += match.group(0)
+
+        pos = end
+
+    cleaned_html += trimmed_content[pos:]
+
+    # Ensure to close all open tags
+    for tag in reversed(open_tags):
+        cleaned_html += f'</{tag}>'
+
+    return cleaned_html + ' ...'
+
+
+def get_bool(value: Optional[str], default: bool = True) -> bool:
+    true_values = ("True", "true", "1", "yes", "on")
+    false_values = ("False", "false", "0", "no", "off")
+
+    if value is None:
+        return default
+    value = value.lower()
+
+    if value in true_values:
+        return True
+    elif value in false_values:
+        return False
+    else:
+        return default
+
+
+def get_env_bool(env, var_name: Optional[str], default: bool = False):
+    """Retrieve environment variable as a boolean."""
+    value = env.get(var_name, "").lower()
+    return get_bool(value, default)
diff --git a/packages/shared/pyproject.toml b/packages/shared/pyproject.toml
new file mode 100644
index 0000000..5a7a82e
--- /dev/null
+++ b/packages/shared/pyproject.toml
@@ -0,0 +1,20 @@
+[project]
+name = "fastfetchbot-shared"
+version = "0.1.0"
+requires-python = ">=3.12,<3.13"
+dependencies = [
+    "loguru>=0.7.2",
+    "beautifulsoup4>=4.13.4",
+    "httpx>=0.28.1",
+    "lxml>=5.4.0",
+    "pydantic>=2.0.0",
+    "pillow>=10.0.0",
+    "python-magic>=0.4.27",
+    "aiofiles>=24.1.0",
+    "fake-useragent>=1.5.1",
+    "playwright>=1.52.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/pyproject.toml b/pyproject.toml
index c067d76..f3c357e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,9 +39,9 @@ dependencies = [
     "markdown>=3.8,<4.0",
     "asyncpraw>=7.8.1,<8.0.0",
     "html-telegraph-poster-v2>=0.2.5,<0.3.0",
-    "fastfetchbot-telegram-bot>=0",
     "firecrawl-py>=4.13.0,<5.0.0",
     "zyte-api>=0.8.1,<0.9.0",
+    "fastfetchbot-shared",
 ]
 
 [project.optional-dependencies]
@@ -61,5 +61,11 @@ build-backend = "hatchling.build"
 [tool.uv]
 package = false
 
+[tool.uv.workspace]
+members = ["packages/*", "apps/*"]
+
+[tool.uv.sources]
+fastfetchbot-shared = { workspace = true }
+
 [tool.pytest.ini_options]
 asyncio_default_fixture_loop_scope = "module"
diff --git a/template.env b/template.env
index 71c6a95..e1dd47b 100644
--- a/template.env
+++ b/template.env
@@ -14,6 +14,14 @@ TELEGRAM_CHAT_ID=
 # Default: `10450`
 PORT=10450
 
+# Telegram Bot -> API Server URL (internal Docker network)
+# Default: `http://localhost:10450`
+API_SERVER_URL=http://api:10450
+
+# API Server -> Telegram Bot callback URL (for Inoreader notifications)
+# Default: `http://localhost:10451`
+TELEGRAM_BOT_CALLBACK_URL=http://telegram-bot:10451
+
 # The api key for the FastAPI server. It would be generated automatically if not set.
 API_KEY=
 
diff --git a/uv.lock b/uv.lock
index 1ed5d09..d5f412f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2,6 +2,14 @@ version = 1
 revision = 3
 requires-python = "==3.12.*"
 
+[manifest]
+members = [
+    "fastfetchbot",
+    "fastfetchbot-api",
+    "fastfetchbot-shared",
+    "fastfetchbot-telegram-bot",
+]
+
 [[package]]
 name = "aioboto3"
 version = "13.4.0"
@@ -527,7 +535,7 @@ dependencies = [
     { name = "beautifulsoup4" },
     { name = "fake-useragent" },
     { name = "fastapi" },
-    { name = "fastfetchbot-telegram-bot" },
+    { name = "fastfetchbot-shared" },
     { name = "firecrawl-py" },
     { name = "gunicorn" },
     { name = "html-telegraph-poster-v2" },
@@ -574,7 +582,7 @@ requires-dist = [
     { name = "beautifulsoup4", specifier = ">=4.13.4,<5.0.0" },
     { name = "fake-useragent", specifier = ">=1.5.1,<2.0.0" },
     { name = "fastapi", specifier = ">=0.115.12,<0.116.0" },
-    { name = "fastfetchbot-telegram-bot", specifier = ">=0" },
+    { name = "fastfetchbot-shared", editable = "packages/shared" },
     { name = "firecrawl-py", specifier = ">=4.13.0,<5.0.0" },
     { name = "gunicorn", specifier = ">=23.0.0,<24.0.0" },
     { name = "html-telegraph-poster-v2", specifier = ">=0.2.5,<0.3.0" },
@@ -608,21 +616,116 @@ dev = [
 ]
 
 [[package]]
-name = "fastfetchbot-telegram-bot"
-version = "0.1.1"
-source = { registry = "https://pypi.org/simple" }
+name = "fastfetchbot-api"
+version = "0.1.0"
+source = { virtual = "apps/api" }
+dependencies = [
+    { name = "aioboto3" },
+    { name = "asyncpraw" },
+    { name = "atproto" },
+    { name = "babel" },
+    { name = "beanie" },
+    { name = "fastapi" },
+    { name = "fastfetchbot-shared" },
+    { name = "firecrawl-py" },
+    { name = "gunicorn" },
+    { name = "html-telegraph-poster-v2" },
+    { name = "jinja2" },
+    { name = "jmespath" },
+    { name = "markdown" },
+    { name = "openai" },
+    { name = "pillow" },
+    { name = "pydub" },
+    { name = "sentry-sdk", extra = ["fastapi"] },
+    { name = "tenacity" },
+    { name = "twitter-api-client-v2" },
+    { name = "uvicorn" },
+    { name = "xhtml2pdf" },
+    { name = "zyte-api" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "aioboto3", specifier = ">=13.4.0" },
+    { name = "asyncpraw", specifier = ">=7.8.1" },
+    { name = "atproto", specifier = ">=0.0.61" },
+    { name = "babel", specifier = ">=2.17.0" },
+    { name = "beanie", specifier = ">=1.29.0" },
+    { name = "fastapi", specifier = ">=0.115.12" },
+    { name = "fastfetchbot-shared", editable = "packages/shared" },
+    { name = "firecrawl-py", specifier = ">=4.13.0" },
+    { name = "gunicorn", specifier = ">=23.0.0" },
+    { name = "html-telegraph-poster-v2", specifier = ">=0.2.5" },
+    { name = "jinja2", specifier = ">=3.1.6" },
+    { name = "jmespath", specifier = ">=1.0.1" },
+    { name = "markdown", specifier = ">=3.8" },
+    { name = "openai", specifier = ">=2.15.0" },
+    { name = "pillow", specifier = ">=10.0.0" },
+    { name = "pydub", specifier = ">=0.25.1" },
+    { name = "sentry-sdk", extras = ["fastapi"], specifier = ">=2.27.0" },
+    { name = "tenacity", specifier = ">=9.1.2" },
+    { name = "twitter-api-client-v2", specifier = ">=0.1.1" },
+    { name = "uvicorn", specifier = ">=0.34.2" },
+    { name = "xhtml2pdf", specifier = ">=0.2.17" },
+    { name = "zyte-api", specifier = ">=0.8.1" },
+]
+
+[[package]]
+name = "fastfetchbot-shared"
+version = "0.1.0"
+source = { editable = "packages/shared" }
 dependencies = [
     { name = "aiofiles" },
     { name = "beautifulsoup4" },
     { name = "fake-useragent" },
-    { name = "jinja2" },
+    { name = "httpx" },
+    { name = "loguru" },
+    { name = "lxml" },
     { name = "pillow" },
+    { name = "playwright" },
+    { name = "pydantic" },
     { name = "python-magic" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "aiofiles", specifier = ">=24.1.0" },
+    { name = "beautifulsoup4", specifier = ">=4.13.4" },
+    { name = "fake-useragent", specifier = ">=1.5.1" },
+    { name = "httpx", specifier = ">=0.28.1" },
+    { name = "loguru", specifier = ">=0.7.2" },
+    { name = "lxml", specifier = ">=5.4.0" },
+    { name = "pillow", specifier = ">=10.0.0" },
+    { name = "playwright", specifier = ">=1.52.0" },
+    { name = "pydantic", specifier = ">=2.0.0" },
+    { name = "python-magic", specifier = ">=0.4.27" },
+]
+
+[[package]]
+name = "fastfetchbot-telegram-bot"
+version = "0.1.0"
+source = { virtual = "apps/telegram-bot" }
+dependencies = [
+    { name = "aiofiles" },
+    { name = "beanie" },
+    { name = "fastfetchbot-shared" },
+    { name = "httpx" },
+    { name = "jinja2" },
     { name = "python-telegram-bot", extra = ["callback-data", "rate-limiter"] },
+    { name = "starlette" },
+    { name = "uvicorn" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/cb/e9/3dbb60f7059dae59f72078c03ffe3fd4849b99c4448ffefc77bb9b0da804/fastfetchbot_telegram_bot-0.1.1.tar.gz", hash = "sha256:a3b0ee8e792837d351b235e4b9e36525738f030174c5f71a8d4d0cd3f60d8498", size = 25696, upload-time = "2024-11-15T20:02:50.576Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/ba/786dc223f50a56817a1c0a5bb96c822991a5ca3fca84660425a45aed1448/fastfetchbot_telegram_bot-0.1.1-py3-none-any.whl", hash = "sha256:559f48cb3f2cbcb7a1838b6b7d3fb1f3bed325e25124bc885da9ad0cb399bb30", size = 29804, upload-time = "2024-11-15T20:02:49.034Z" },
+
+[package.metadata]
+requires-dist = [
+    { name = "aiofiles", specifier = ">=24.1.0" },
+    { name = "beanie", specifier = ">=1.29.0" },
+    { name = "fastfetchbot-shared", editable = "packages/shared" },
+    { name = "httpx", specifier = ">=0.28.1" },
+    { name = "jinja2", specifier = ">=3.1.6" },
+    { name = "python-telegram-bot", extras = ["callback-data", "rate-limiter"], specifier = ">=21.11" },
+    { name = "starlette", specifier = ">=0.45.0" },
+    { name = "uvicorn", specifier = ">=0.34.2" },
 ]
 
 [[package]]

From 5bf29aed71cb26da19554acf8684c6dd69d43459 Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Wed, 18 Feb 2026 01:50:45 -0600
Subject: [PATCH 3/8] feat: update GitHub CI

---
 .github/workflows/ci.yml    | 89 ++++++++++++++++++++-----------------
 docker-compose.template.yml | 14 +++---
 2 files changed, 57 insertions(+), 46 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9cb669e..77d7f80 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -5,65 +5,74 @@ on:
     branches:
       - main
 
-env:
-  APP_NAME: fastfetchbot
-  DOCKERHUB_REPO: aturret/fastfetchbot
-#   APP_VERSION: latest
-
-concurrency: 
+concurrency:
   group: fastfetchbot
   cancel-in-progress: true
 
 jobs:
-  docker:
+  build:
     runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    strategy:
+      matrix:
+        include:
+          - service: api
+            dockerfile: apps/api/Dockerfile
+            image_suffix: api
+          - service: telegram-bot
+            dockerfile: apps/telegram-bot/Dockerfile
+            image_suffix: telegram-bot
     steps:
-      -
-        name: Checkout
-        uses: actions/checkout@v2
-      -
-        name: Check commit message
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Check commit message
         id: check_message
         run: |
           MESSAGE=$(git log --format=%B -n 1 ${{ github.sha }})
           if [[ "$MESSAGE" == *"[github-action]"* ]]; then
-            echo "::set-output name=skip::true"
+            echo "skip=true" >> "$GITHUB_OUTPUT"
           else
-            echo "::set-output name=skip::false"
+            echo "skip=false" >> "$GITHUB_OUTPUT"
           fi
 
-      -
-        name: Set up QEMU
-        uses: docker/setup-qemu-action@v1
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v2
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v1
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
         with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
-      -
-        name: Generate App Version
-        run: echo APP_VERSION=`git describe --tags --always` >> $GITHUB_ENV
-      -
-        name: Build and push
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Generate App Version
+        run: echo "APP_VERSION=$(git describe --tags --always)" >> "$GITHUB_ENV"
+
+      - name: Build and push
         if: steps.check_message.outputs.skip == 'false'
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v6
         with:
           context: .
-          platforms: |
-            linux/amd64
+          file: ${{ matrix.dockerfile }}
+          platforms: linux/amd64
           push: true
           build-args: |
-            APP_NAME=${{ env.APP_NAME }}
             APP_VERSION=${{ env.APP_VERSION }}
           tags: |
-            ${{ env.DOCKERHUB_REPO }}:latest
-#             ${{ env.DOCKERHUB_REPO }}:${{ env.APP_VERSION }}
-      -
-        name: send curl request
-        run: |
-          curl -H 'Authorization: Bearer ${{ secrets.WATCHTOWER_TOKEN }}' ${{ secrets.WATCHTOWER_WEBHOOK_URL }}
+            ghcr.io/${{ github.repository }}-${{ matrix.image_suffix }}:latest
 
+  deploy:
+    needs: build
+    runs-on: ubuntu-latest
+    steps:
+      - name: Trigger Watchtower deployment
+        run: |
+          curl -H "Authorization: Bearer ${{ secrets.WATCHTOWER_TOKEN }}" ${{ secrets.WATCHTOWER_WEBHOOK_URL }}
diff --git a/docker-compose.template.yml b/docker-compose.template.yml
index e6e7b4d..7380e5f 100644
--- a/docker-compose.template.yml
+++ b/docker-compose.template.yml
@@ -1,8 +1,9 @@
 services:
   api:
-    build:
-      context: .
-      dockerfile: apps/api/Dockerfile
+    image: ghcr.io/aturret/fastfetchbot-api:latest
+    # build:
+    #   context: .
+    #   dockerfile: apps/api/Dockerfile
     container_name: fastfetchbot-api
     # restart: always
     volumes:
@@ -16,9 +17,10 @@ services:
       - telegram-bot-api
 
   telegram-bot:
-    build:
-      context: .
-      dockerfile: apps/telegram-bot/Dockerfile
+    image: ghcr.io/aturret/fastfetchbot-telegram-bot:latest
+    # build:
+    #   context: .
+    #   dockerfile: apps/telegram-bot/Dockerfile
     container_name: fastfetchbot-telegram-bot
     # restart: always
     env_file:

From 4f9f4e5ec5da4e96aa71127e23f7b586c81a1a5b Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Wed, 18 Feb 2026 01:56:18 -0600
Subject: [PATCH 4/8] feat: update README.md

---
 README.md | 210 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 140 insertions(+), 70 deletions(-)

diff --git a/README.md b/README.md
index 77cff15..a0c0a49 100644
--- a/README.md
+++ b/README.md
@@ -2,128 +2,191 @@ Demo: https://t.me/aturretrss_bot
 
 # FastFetchBot
 
-A social media fetch API based on [FastAPI](https://fastapi.tiangolo.com/), with Telegram Bot as the default client.
+A social media content fetching service with a Telegram Bot client, built as a monorepo with two microservices.
 
-Supported most mainstream social media platforms. You can get a permanent copy of the content by just sending the url to the bot.
+Send a social media URL to the bot, and it fetches and archives the content for you. Supports most mainstream social media platforms.
 
-Other separated microservices for this project:
+## Architecture
 
-- [FastFileExporter](https://github.com/aturret/FastFileExporter)
-- [FastFetchBot-Telegram-Bot](https://github.com/aturret/FastFetchBot-Telegram-Bot)
+FastFetchBot is organized as a UV workspace monorepo with three packages:
 
+```
+FastFetchBot/
+├── packages/shared/          # fastfetchbot-shared: common models, utilities, logger
+├── apps/api/                 # FastAPI server: scrapers, storage, routing
+├── apps/telegram-bot/        # Telegram Bot: webhook/polling, message handling
+├── app/                      # Legacy re-export wrappers (backward compatibility)
+├── pyproject.toml            # Root workspace configuration
+└── uv.lock                   # Lockfile for the entire workspace
+```
 
-## Installation
-
-### Docker (Recommended)
-
-Download the docker-compose.yml file and set the environment variables as the following section.
+| Service | Port | Description |
+|---------|------|-------------|
+| **API Server** (`apps/api/`) | 10450 | FastAPI app with all platform scrapers, file export, and storage |
+| **Telegram Bot** (`apps/telegram-bot/`) | 10451 | Receives messages via webhook or long polling, calls the API server |
 
-#### Env
+The Telegram Bot communicates with the API server over HTTP. In Docker, this is `http://api:10450`.
 
-Create a `.env` file at the same directory and set the [environment variables](#envrionment-variables).
+## Installation
 
-#### Local Telegram API Sever
+### Docker (Recommended)
 
-If you want to send documents that larger than 50MB, you need to run a local telegram api server. The `docker-compose.yml` file has already give you an example. You just need to fill the `TELEGRAM_API_ID` and `TELEGRAM_API_HASH` in the yml file. If you don't need it, just comment it out.
+1. Copy `docker-compose.template.yml` to `docker-compose.yml`.
+2. Create a `.env` file from `template.env` and fill in the [environment variables](#environment-variables).
+3. If you need large file support (>50 MB), fill in `TELEGRAM_API_ID` and `TELEGRAM_API_HASH` in the compose file for the local Telegram Bot API server. Otherwise, comment out the `telegram-bot-api` service.
 
 ```bash
 docker-compose up -d
 ```
 
-### Python (Not Recommended)
+The compose file pulls pre-built images from GitHub Container Registry:
 
-Local Telegram API sever and video download function is not supported in this way. If you do really need these functions, you can run the telegram api server and [the file export server](https://github.com/aturret/FastFileExporter) manually.
+- `ghcr.io/aturret/fastfetchbot-api:latest`
+- `ghcr.io/aturret/fastfetchbot-telegram-bot:latest`
 
-We use [Poetry](https://python-poetry.org/) as the package manager for this project. You can install it by the following command.
+To build locally instead, uncomment the `build:` blocks and comment out the `image:` lines in `docker-compose.yml`.
 
-```bash
-pip install poetry
-```
+### Local Development
 
-Then, install the dependencies.
+Requires Python 3.12 and [uv](https://docs.astral.sh/uv/).
 
 ```bash
-poetry install
-```
+# Install all dependencies (including dev)
+uv sync
 
-Finally, run the server.
+# Run the API server
+cd apps/api
+uv run gunicorn -k uvicorn.workers.UvicornWorker src.main:app --preload
 
-```bash
-poetry run gunicorn -k uvicorn.workers.UvicornWorker app.main:app --preload
+# Run the Telegram Bot (in a separate terminal)
+cd apps/telegram-bot
+uv run python -m core.main
 ```
 
-## Environment Variables
+### Telegram Bot Modes
+
+The bot supports two modes, controlled by the `TELEGRAM_BOT_MODE` environment variable:
 
-Note: Many of the services requires cookies to fetch content. You can get your cookies by browser extension [Get cookies.txt LOCALLY](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc) and set the cookies as environment variables.
+| Mode | Value | Use Case |
+|------|-------|----------|
+| **Long Polling** | `polling` (default) | Local development, simple deployments without a reverse proxy |
+| **Webhook** | `webhook` | Production with a public HTTPS URL |
 
+In both modes, the bot runs an HTTP server on port 10451 for the `/send_message` callback endpoint (used by Inoreader integration) and `/health`.
 
-### Required Variables
+## Development
 
-- `BASE_URL`: The base url of the server. example: `example.com`
-- `TELEGRAM_BOT_TOKEN`: The token of the telegram bot.
-- `TELEGRAM_CHAT_ID`: The chat id of the telegram bot.
+### Commands
 
-### Optional Variables
+```bash
+uv sync                    # Install all dependencies
+uv run pytest              # Run tests
+uv run pytest -v           # Run tests with verbose output
+uv run black .             # Format code
+```
 
-#### FastAPI
+### Adding a New Platform Scraper
 
-- `PORT`: Default: `10450`
-- `API_KEY`: The api key for the FastAPI server. It would be generated automatically if not set.
+1. Create a new scraper module in `apps/api/src/services/scrapers/<platform>/`
+2. Implement the scraper class following existing patterns
+3. Add a platform-specific router in `apps/api/src/routers/`
+4. Register the scraper in `ScraperManager`
+5. Add configuration variables in `apps/api/src/config.py`
+6. Create tests in `tests/cases/`
 
-#### Telegram
+### Docker Build
 
-- `TELEBOT_API_SERVER_HOST`: The host of the telegram bot api server. Default: `telegram-bot-api`
-- `TELEBOT_API_SERVER_PORT`: The port of the telegram bot api server. Default: `8081`
-- `TELEGRAM_CHANNEL_ID`: The channel id of the telegram bot. Default: `None`
-- `TELEGRAM_CHANNEL_ADMIN_LIST`: The id list of the users who can send message to targeted telegram channel, divided by `,`. You cannot send message to the channel if you are not in the list. Default: `None`
+```bash
+# Build both services locally
+docker-compose build
 
-#### Twitter
+# Or build individually
+docker build -f apps/api/Dockerfile -t fastfetchbot-api .
+docker build -f apps/telegram-bot/Dockerfile -t fastfetchbot-telegram-bot .
+```
 
-Must set cookies variables if you want to fetch twitter content.
+> **Note:** Both Dockerfiles use the repository root as the build context (`.`) because they need access to `pyproject.toml`, `uv.lock`, and `packages/shared/`.
 
-- `TWITTER_CT0`: The ct0 cookie of twitter. Default: `None`
-- `TWITTER_AUTH_TOKEN`: The auth token of twitter. Default: `None`
+## Environment Variables
 
-#### Reddit
+Many scrapers require authentication cookies. You can extract cookies using the browser extension [Get cookies.txt LOCALLY](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc).
 
-We use `read_only` mode of `praw` to fetch reddit content. We still need to set the `client_id` , `client_secret` , `username` and `password` of your reddit api account.
+See `template.env` for a complete reference with comments.
 
-- `REDDIT_CLIENT_ID`: The client id of reddit. Default: `None`
-- `REDDIT_CLIENT_SECRET`: The client secret of reddit. Default: `None`
-- `REDDIT_USERNAME`: The username of reddit. Default: `None`
-- `REDDIT_PASSWORD`: The password of reddit. Default: `None`
+### Required
 
-#### Weibo
+| Variable | Description |
+|----------|-------------|
+| `BASE_URL` | Public domain of the server (e.g. `example.com`). Used for webhook URL construction. |
+| `TELEGRAM_BOT_TOKEN` | Bot token from [@BotFather](https://t.me/BotFather) |
+| `TELEGRAM_CHAT_ID` | Default chat ID for the bot |
 
-- `WEIBO_COOKIES`: The cookie of weibo. For some unknown reasons, some weibo posts may be not accessible if you don't are not logged in. Just copy the cookie from your browser and set it. Default: `None`
+### Service Communication (Docker)
 
-#### Xiaohongshu
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `API_SERVER_URL` | `http://localhost:10450` | URL the Telegram Bot uses to call the API server. Set to `http://api:10450` in Docker. |
+| `TELEGRAM_BOT_CALLBACK_URL` | `http://localhost:10451` | URL the API server uses to call the Telegram Bot. Set to `http://telegram-bot:10451` in Docker. |
+| `TELEGRAM_BOT_MODE` | `polling` | `polling` or `webhook` |
 
-- `XIAOHONGSHU_A1`: The a1 cookie of xiaohongshu. Default: `None`
-- `XIAOHONGSHU_WEBID`: The webid cookie of xiaohongshu. Default: `None`
-- `XIAOHONGSHU_WEBSESSION`: The websession cookie of xiaohongshu. Default: `None`
-#### OpenAI
+### Optional
 
-You can set the api key of OpenAI to use the transcription function.
+#### API Server
 
-- `OPENAI_API_KEY`: The api key of OpenAI. Default: `None`
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `PORT` | `10450` | API server port |
+| `API_KEY` | auto-generated | API key for authentication |
 
-#### Amazon S3 Picture Storage
+#### Telegram
 
-- `AWS_ACCESS_KEY_ID`: The access key id of Amazon S3. Default: `None`
-- `AWS_SECRET_ACCESS_KEY`: The secret access key of Amazon S3. Default: `None`
-- `AWS_S3_BUCKET_NAME`: The bucket name of Amazon S3. Default: `None`
-- `AWS_S3_REGION_NAME`: The region name of Amazon S3. Default: `None`
-- `AWS_DOMAIN_HOST`: The domain bound to the bucket. The picture upload function would generate images url by bucket name if customized host not set. Default: `None`
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `TELEBOT_API_SERVER_HOST` | `None` | Local Telegram Bot API server host |
+| `TELEBOT_API_SERVER_PORT` | `None` | Local Telegram Bot API server port |
+| `TELEGRAM_CHANNEL_ID` | `None` | Channel ID(s) for the bot, comma-separated |
+| `TELEGRAM_CHANNEL_ADMIN_LIST` | `None` | User IDs allowed to post to the channel, comma-separated |
+
+#### Platform Cookies & Credentials
+
+| Platform | Variables |
+|----------|-----------|
+| Twitter | `TWITTER_CT0`, `TWITTER_AUTH_TOKEN` |
+| Reddit | `REDDIT_CLIENT_ID`, `REDDIT_CLIENT_SECRET`, `REDDIT_USERNAME`, `REDDIT_PASSWORD` |
+| Weibo | `WEIBO_COOKIES` |
+| Xiaohongshu | `XIAOHONGSHU_A1`, `XIAOHONGSHU_WEBID`, `XIAOHONGSHU_WEBSESSION` |
+| Instagram | `X_RAPIDAPI_KEY` |
+| Zhihu | Store cookies in `conf/zhihu_cookies.json` |
+
+#### Cloud Services
+
+| Variable | Description |
+|----------|-------------|
+| `OPENAI_API_KEY` | OpenAI API key for audio transcription |
+| `AWS_ACCESS_KEY_ID` | Amazon S3 access key |
+| `AWS_SECRET_ACCESS_KEY` | Amazon S3 secret key |
+| `AWS_S3_BUCKET_NAME` | S3 bucket name |
+| `AWS_S3_REGION_NAME` | S3 region |
+| `AWS_DOMAIN_HOST` | Custom domain bound to the S3 bucket |
+
+#### General Webpage Scraping
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `GENERAL_SCRAPING_ON` | `false` | Enable scraping for unrecognized URLs |
+| `GENERAL_SCRAPING_API` | `FIRECRAWL` | Backend: `FIRECRAWL` or `ZYTE` |
+| `FIRECRAWL_API_URL` | | Firecrawl API server URL |
+| `FIRECRAWL_API_KEY` | | Firecrawl API key |
+| `ZYTE_API_KEY` | | Zyte API key |
 
 ## Supported Content Types
 
-### Social Media Content
+### Social Media
 
 - [x] Twitter
 - [x] Bluesky (Beta, only supports part of posts)
 - [x] Instagram
-- [ ] Threads 
+- [ ] Threads
 - [x] Reddit (Beta, only supports part of posts)
 - [ ] Quora
 - [x] Weibo
@@ -132,11 +195,18 @@ You can set the api key of OpenAI to use the transcription function.
 - [x] Douban
 - [ ] Xiaohongshu
 
-### Video Content
+### Video
 
-- [x] Youtube
+- [x] YouTube
 - [x] Bilibili
 
+## CI/CD
+
+The GitHub Actions pipeline (`.github/workflows/ci.yml`) automatically builds and pushes both microservice images to GitHub Container Registry on every push to `main`:
+
+- `ghcr.io/aturret/fastfetchbot-api:latest`
+- `ghcr.io/aturret/fastfetchbot-telegram-bot:latest`
+
 ## Acknowledgements
 
 The HTML to Telegra.ph converter function is based on [html-telegraph-poster](https://github.com/mercuree/html-telegraph-poster). I separated it from this project as an independent Python package: [html-telegraph-poster-v2](https://github.com/aturret/html-telegraph-poster-v2).

From 944bb62aa8720c158d7f296289ca94a18958f5e3 Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Wed, 18 Feb 2026 02:00:48 -0600
Subject: [PATCH 5/8] chore: fix extra spaces

---
 tests/routers/test_scraper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/routers/test_scraper.py b/tests/routers/test_scraper.py
index 77479d0..4aebbef 100644
--- a/tests/routers/test_scraper.py
+++ b/tests/routers/test_scraper.py
@@ -2,7 +2,7 @@
 Tests for /scraper router endpoints.
 
 Endpoints:
-    POST /scraper/getItem      — Scrape content from a URL
+    POST /scraper/getItem — Scrape content from a URL
     POST /scraper/getUrlMetadata — Get URL metadata without scraping
 
 All downstream services (InfoExtractService, get_url_metadata) are mocked.

From bacab564920906d227477b7b9854445ab4a7c858 Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Wed, 18 Feb 2026 12:34:58 -0600
Subject: [PATCH 6/8] chore: recover the old script

---
 app/routers/feed_push.py | 53 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 app/routers/feed_push.py

diff --git a/app/routers/feed_push.py b/app/routers/feed_push.py
new file mode 100644
index 0000000..997fdb1
--- /dev/null
+++ b/app/routers/feed_push.py
@@ -0,0 +1,53 @@
+# TODO: this script is now unused, will be removed in the future
+
+from fastapi import APIRouter
+from fastapi.requests import Request
+
+from app.config import TELEGRAM_CHANNEL_ID
+from app.services.telegram_bot import send_item_message
+from app.services.scrapers.common import InfoExtractService
+from fastapi import Security
+from app.auth import verify_api_key
+from app.utils.logger import logger
+from app.utils.parse import get_url_metadata
+
+router = APIRouter(prefix="/feedPush")
+
+
+async def get_feed_item(url: str, channel_id: str, **kwargs):
+    try:
+        channel_id = int(channel_id) if channel_id.startswith("-") else channel_id
+        url_metadata = await get_url_metadata(url)
+        item = InfoExtractService(url_metadata, **kwargs)
+        metadata_item = await item.get_item()
+        if channel_id not in TELEGRAM_CHANNEL_ID:
+            logger.error(f"channel_id {channel_id} not found")
+            return
+        await send_item_message(metadata_item, chat_id=channel_id)
+    except Exception as e:
+        logger.error(f"Error while getting item: {e}")
+
+
+@router.post("/", dependencies=[Security(verify_api_key)])
+async def push_feed_item(
+    request: Request,
+):
+    try:
+        data = await request.json()
+        params = request.query_params
+        url = (
+            data.get("url")
+            or data.get("aurl")
+            or params.get("url")
+            or params.get("aurl")
+        )
+        if not url:
+            return f"Error: url is required"
+        channel_id = data.get("channelId") or params.get("channelId")
+        if not channel_id:
+            return f"Error: channelId is required"
+        kwargs = data.get("kwargs", {})
+        await get_feed_item(url, channel_id, **kwargs)
+        return "ok"
+    except Exception as e:
+        return f"Error: {e}"

From 6b5d9a849875ad36ea9092d9396aa112abb0e955 Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Wed, 18 Feb 2026 12:35:30 -0600
Subject: [PATCH 7/8] feat: add tests for tg bot webhook

---
 apps/telegram-bot/tests/__init__.py     |   0
 apps/telegram-bot/tests/conftest.py     |  67 ++++++++++++++++
 apps/telegram-bot/tests/test_webhook.py | 101 ++++++++++++++++++++++++
 3 files changed, 168 insertions(+)
 create mode 100644 apps/telegram-bot/tests/__init__.py
 create mode 100644 apps/telegram-bot/tests/conftest.py
 create mode 100644 apps/telegram-bot/tests/test_webhook.py

diff --git a/apps/telegram-bot/tests/__init__.py b/apps/telegram-bot/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apps/telegram-bot/tests/conftest.py b/apps/telegram-bot/tests/conftest.py
new file mode 100644
index 0000000..d006314
--- /dev/null
+++ b/apps/telegram-bot/tests/conftest.py
@@ -0,0 +1,67 @@
+"""
+Test fixtures for the telegram-bot app.
+
+All bot service calls are mocked — tests verify routing, auth, and
+request handling without touching the real Telegram API.
+"""
+
+import os
+
+import pytest
+import pytest_asyncio
+from contextlib import asynccontextmanager
+from unittest.mock import patch
+
+from httpx import AsyncClient, ASGITransport
+
+TEST_TELEGRAM_SECRET = "test-telegram-secret-token"
+
+os.environ["TELEGRAM_BOT_SECRET_TOKEN"] = TEST_TELEGRAM_SECRET
+os.environ["TELEGRAM_BOT_TOKEN"] = "000000000:AAFakeTokenForTesting"
+os.environ["DATABASE_ON"] = "false"
+os.environ["BASE_URL"] = "localhost"
+
+
+@pytest.fixture(scope="session")
+def anyio_backend():
+    return "asyncio"
+
+
+@pytest_asyncio.fixture(scope="module")
+async def app():
+    """
+    Create a Starlette webhook_app with a no-op lifespan
+    so we can test routes without real bot initialization.
+    """
+    @asynccontextmanager
+    async def mock_lifespan(app):
+        yield
+
+    with patch("core.webhook.server.lifespan", mock_lifespan):
+        from starlette.applications import Starlette
+        from starlette.routing import Route
+        from core.webhook.server import telegram_webhook, send_message_endpoint, health
+
+        test_app = Starlette(
+            routes=[
+                Route("/webhook", telegram_webhook, methods=["POST"]),
+                Route("/send_message", send_message_endpoint, methods=["POST"]),
+                Route("/health", health, methods=["GET"]),
+            ],
+            lifespan=mock_lifespan,
+        )
+        yield test_app
+
+
+@pytest_asyncio.fixture(scope="module")
+async def client(app):
+    """Async HTTP client hitting the Starlette app via ASGI transport."""
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as ac:
+        yield ac
+
+
+@pytest.fixture
+def telegram_auth_headers():
+    """Headers dict with valid Telegram secret token."""
+    return {"X-Telegram-Bot-Api-Secret-Token": TEST_TELEGRAM_SECRET}
diff --git a/apps/telegram-bot/tests/test_webhook.py b/apps/telegram-bot/tests/test_webhook.py
new file mode 100644
index 0000000..e1ddd93
--- /dev/null
+++ b/apps/telegram-bot/tests/test_webhook.py
@@ -0,0 +1,101 @@
+"""
+Tests for the telegram-bot webhook endpoint.
+
+Verifies:
+    - Auth: valid/missing/wrong secret token handling
+    - Processing: updates are dispatched via asyncio.create_task (fire-and-forget)
+    - Response: 200 with {"status": "ok"} for valid requests
+"""
+
+import asyncio
+
+import pytest
+from unittest.mock import AsyncMock, patch
+
+from tests.conftest import TEST_TELEGRAM_SECRET
+
+
+SAMPLE_UPDATE = {
+    "update_id": 123456,
+    "message": {
+        "message_id": 1,
+        "text": "/start",
+        "chat": {"id": 789, "type": "private"},
+    },
+}
+
+
+class TestTelegramWebhook:
+    """Tests for POST /webhook"""
+
+    @pytest.mark.asyncio
+    async def test_webhook_accepts_valid_update(self, client, telegram_auth_headers):
+        """Valid secret token + JSON body -> 200, process_telegram_update is called."""
+        with patch(
+            "core.webhook.server.process_telegram_update",
+            new_callable=AsyncMock,
+        ) as mock_process:
+            resp = await client.post(
+                "/webhook",
+                json=SAMPLE_UPDATE,
+                headers=telegram_auth_headers,
+            )
+
+            assert resp.status_code == 200
+            assert resp.json() == {"status": "ok"}
+
+            # Allow the create_task coroutine to run
+            await asyncio.sleep(0)
+            mock_process.assert_called_once_with(SAMPLE_UPDATE)
+
+    @pytest.mark.asyncio
+    async def test_webhook_rejects_missing_token(self, client):
+        """No secret token header -> 401."""
+        resp = await client.post("/webhook", json=SAMPLE_UPDATE)
+        assert resp.status_code == 401
+        assert resp.json() == {"error": "unauthorized"}
+
+    @pytest.mark.asyncio
+    async def test_webhook_rejects_wrong_token(self, client):
+        """Wrong secret token -> 401."""
+        resp = await client.post(
+            "/webhook",
+            json=SAMPLE_UPDATE,
+            headers={"X-Telegram-Bot-Api-Secret-Token": "wrong-token"},
+        )
+        assert resp.status_code == 401
+        assert resp.json() == {"error": "unauthorized"}
+
+    @pytest.mark.asyncio
+    async def test_webhook_responds_before_processing_completes(
+        self, client, telegram_auth_headers
+    ):
+        """
+        The webhook must return 200 immediately, before the update
+        processing finishes. This is the fire-and-forget behavior that
+        prevents Telegram from timing out on slow handlers.
+        """
+        processing_started = asyncio.Event()
+        processing_gate = asyncio.Event()
+
+        async def slow_process(data):
+            processing_started.set()
+            await processing_gate.wait()  # Block until test releases
+
+        with patch(
+            "core.webhook.server.process_telegram_update",
+            side_effect=slow_process,
+        ):
+            resp = await client.post(
+                "/webhook",
+                json=SAMPLE_UPDATE,
+                headers=telegram_auth_headers,
+            )
+
+            # Response arrived while processing is still blocked
+            assert resp.status_code == 200
+            assert resp.json() == {"status": "ok"}
+
+            # Let the background task finish to avoid warnings
+            processing_gate.set()
+            await asyncio.sleep(0)

From 6ba51c3506b2cc8780d7889fae2f45900f2aa3cc Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Wed, 18 Feb 2026 12:35:52 -0600
Subject: [PATCH 8/8] feat: add env var for tgbot port

---
 apps/telegram-bot/core/config.py            |  3 +++
 apps/telegram-bot/core/handlers/messages.py | 15 ++++++++++++---
 apps/telegram-bot/core/main.py              | 10 +++++-----
 apps/telegram-bot/core/webhook/server.py    | 10 ++++++++--
 template.env                                |  4 ++++
 5 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/apps/telegram-bot/core/config.py b/apps/telegram-bot/core/config.py
index 81c63d9..00d68a0 100644
--- a/apps/telegram-bot/core/config.py
+++ b/apps/telegram-bot/core/config.py
@@ -72,6 +72,9 @@
     False if TELEBOT_API_SERVER == "https://api.telegram.org/bot" else True
 )
 
+# Telegram Bot server port
+TELEGRAM_BOT_PORT = int(env.get("TELEGRAM_BOT_PORT", 10451)) or 10451
+
 # Telegram Bot timeouts
 TELEBOT_CONNECT_TIMEOUT = int(env.get("TELEGRAM_CONNECT_TIMEOUT", 15)) or 15
 TELEBOT_READ_TIMEOUT = int(env.get("TELEGRAM_READ_TIMEOUT", 60)) or 60
diff --git a/apps/telegram-bot/core/handlers/messages.py b/apps/telegram-bot/core/handlers/messages.py
index e4012ae..83f725f 100644
--- a/apps/telegram-bot/core/handlers/messages.py
+++ b/apps/telegram-bot/core/handlers/messages.py
@@ -53,6 +53,15 @@ async def error_process(update: object, context: ContextTypes.DEFAULT_TYPE) -> N
     debug_chat_id = update.message.chat_id
     if TELEBOT_DEBUG_CHANNEL is not None:
         debug_chat_id = TELEBOT_DEBUG_CHANNEL
-    await context.bot.send_message(
-        chat_id=debug_chat_id, text=message, parse_mode=ParseMode.HTML
-    )
+    try:
+        await context.bot.send_message(
+            chat_id=debug_chat_id, text=message, parse_mode=ParseMode.HTML
+        )
+    except Exception:
+        logger.error("Failed to send error message to debug chat.")
+        error_message = f"""Failed to send error message to debug chat.
+        update: {html.escape(json.dumps(update_str, indent=2, ensure_ascii=False))}
+        """
+        await context.bot.send_message(
+            chat_id=debug_chat_id, text=error_message, parse_mode=ParseMode.HTML
+        )
\ No newline at end of file
diff --git a/apps/telegram-bot/core/main.py b/apps/telegram-bot/core/main.py
index 90435a4..c68b0ad 100644
--- a/apps/telegram-bot/core/main.py
+++ b/apps/telegram-bot/core/main.py
@@ -1,13 +1,13 @@
 import uvicorn
 from core.webhook.server import webhook_app, callback_app
-from core.config import TELEGRAM_BOT_MODE
+from core.config import TELEGRAM_BOT_MODE, TELEGRAM_BOT_PORT
 from fastfetchbot_shared.utils.logger import logger
 
 
 if __name__ == "__main__":
     if TELEGRAM_BOT_MODE == "webhook":
-        logger.info("Running in webhook mode on port 10451")
-        uvicorn.run(webhook_app, host="0.0.0.0", port=10451)
+        logger.info(f"Running in webhook mode on port {TELEGRAM_BOT_PORT}")
+        uvicorn.run(webhook_app, host="0.0.0.0", port=TELEGRAM_BOT_PORT)
     else:
-        logger.info("Running in polling mode (HTTP server on port 10451 for callbacks)")
-        uvicorn.run(callback_app, host="0.0.0.0", port=10451)
+        logger.info(f"Running in polling mode (HTTP server on port {TELEGRAM_BOT_PORT} for callbacks)")
+        uvicorn.run(callback_app, host="0.0.0.0", port=TELEGRAM_BOT_PORT)
diff --git a/apps/telegram-bot/core/webhook/server.py b/apps/telegram-bot/core/webhook/server.py
index 8543d92..22189a0 100644
--- a/apps/telegram-bot/core/webhook/server.py
+++ b/apps/telegram-bot/core/webhook/server.py
@@ -1,3 +1,4 @@
+import asyncio
 from contextlib import asynccontextmanager
 
 from starlette.applications import Starlette
@@ -29,7 +30,11 @@ async def lifespan(app):
     if TELEGRAM_BOT_TOKEN:
         await startup()
         if TELEGRAM_BOT_MODE == "webhook":
-            await set_webhook()
+            result = await set_webhook()
+            if result:
+                logger.info("Webhook registered successfully")
+            else:
+                logger.error("Failed to register webhook!")
         else:
             await start_polling()
         await show_bot_info()
@@ -49,7 +54,8 @@ async def telegram_webhook(request: Request):
     if secret != TELEGRAM_BOT_SECRET_TOKEN:
         return JSONResponse({"error": "unauthorized"}, status_code=401)
     data = await request.json()
-    await process_telegram_update(data)
+    logger.debug(f"Telegram webhook update received: {data.get('update_id', 'unknown')}")
+    asyncio.create_task(process_telegram_update(data))
     return JSONResponse({"status": "ok"})
 
 
diff --git a/template.env b/template.env
index e1dd47b..29a0345 100644
--- a/template.env
+++ b/template.env
@@ -8,6 +8,10 @@ TELEGRAM_BOT_TOKEN=
 # The chat id of the telegram bot.
 TELEGRAM_CHAT_ID=
 
+# Telegram Bot Mode: "webhook" or "polling" (default: polling)
+# In webhook mode, BASE_URL must route to the telegram-bot service on port 10451
+TELEGRAM_BOT_MODE=polling
+
 # Optional Variables
 
 # FastAPI