From a100dacf3744b0930dfe79b1646a0c89ec416629 Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Mon, 23 Feb 2026 16:56:47 -0600
Subject: [PATCH 1/8] Update .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 09cf776..5f5986c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -272,3 +272,4 @@ conf/*
 .DS_Store
 /.claude/
 /apps/worker/conf/
+apps/worker/celerybeat-schedule.db

From ee2d78bab0f874a278389bb6ac091001fd693f3b Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Mon, 23 Feb 2026 16:56:52 -0600
Subject: [PATCH 2/8] Update .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 5f5986c..72992e5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -273,3 +273,4 @@ conf/*
 /.claude/
 /apps/worker/conf/
 apps/worker/celerybeat-schedule.db
+apps/worker/celerybeat-schedule.db

From 9d2d1e3449fc439713318270988fca3878134421 Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Mon, 23 Feb 2026 16:57:33 -0600
Subject: [PATCH 3/8] feat: revive xhs scraping

---
 apps/api/src/config.py                        |  26 +
 .../services/scrapers/xiaohongshu/__init__.py | 123 ++---
 .../services/scrapers/xiaohongshu/adaptar.py  | 517 ++++++++++++++++++
 .../src/templates/xiaohongshu_content.jinja2  |   4 +-
 packages/shared/fastfetchbot_shared/config.py |   4 +
 .../fastfetchbot_shared/utils/config.py       |   3 +-
 template.env                                  |   8 +
 7 files changed, 591 insertions(+), 94 deletions(-)
 create mode 100644 apps/api/src/services/scrapers/xiaohongshu/adaptar.py

diff --git a/apps/api/src/config.py b/apps/api/src/config.py
index e02c3af..dfad573 100644
--- a/apps/api/src/config.py
+++ b/apps/api/src/config.py
@@ -89,6 +89,32 @@
 XHS_ENABLE_IP_PROXY = get_env_bool(env, "XHS_ENABLE_IP_PROXY", False)
 XHS_SAVE_LOGIN_STATE = get_env_bool(env, "XHS_SAVE_LOGIN_STATE", True)
 
+# XHS sign server and cookie file
+from fastfetchbot_shared.config import SIGN_SERVER_URL as XHS_SIGN_SERVER_URL
+from fastfetchbot_shared.config import XHS_COOKIE_PATH as _XHS_COOKIE_PATH
+
+xhs_cookie_path = _XHS_COOKIE_PATH or os.path.join(conf_dir, "xhs_cookies.txt")
+
+# Load XHS cookies from file (similar to Zhihu cookie loading)
+XHS_COOKIE_STRING = ""
+if os.path.exists(xhs_cookie_path):
+    try:
+        with open(xhs_cookie_path, "r", encoding="utf-8") as f:
+            XHS_COOKIE_STRING = f.read().strip()
+    except (IOError, OSError) as e:
+        print(f"Error reading XHS cookie file: {e}")
+        XHS_COOKIE_STRING = ""
+else:
+    # Fallback: build cookie string from individual env vars (backward compat)
+    cookie_parts = []
+    if XIAOHONGSHU_A1:
+        cookie_parts.append(f"a1={XIAOHONGSHU_A1}")
+    if XIAOHONGSHU_WEBID:
+        cookie_parts.append(f"web_id={XIAOHONGSHU_WEBID}")
+    if XIAOHONGSHU_WEBSESSION:
+        cookie_parts.append(f"web_session={XIAOHONGSHU_WEBSESSION}")
+    XHS_COOKIE_STRING = "; ".join(cookie_parts)
+
 # Zhihu
 FXZHIHU_HOST = env.get("FXZHIHU_HOST", "fxzhihu.com")
 
diff --git a/apps/api/src/services/scrapers/xiaohongshu/__init__.py b/apps/api/src/services/scrapers/xiaohongshu/__init__.py
index 6225a35..bd66cea 100644
--- a/apps/api/src/services/scrapers/xiaohongshu/__init__.py
+++ b/apps/api/src/services/scrapers/xiaohongshu/__init__.py
@@ -1,23 +1,14 @@
-import asyncio
 from typing import Any
-from urllib.parse import urlparse
-
-import httpx
-import jmespath
 
 from fastfetchbot_shared.models.metadata_item import MetadataItem, MediaFile, MessageType
-from fastfetchbot_shared.utils.network import HEADERS
-from src.config import JINJA2_ENV, HTTP_REQUEST_TIMEOUT
-from .xhs.core import XiaoHongShuCrawler
-from .xhs.client import XHSClient
-from .xhs import proxy_account_pool
-
 from fastfetchbot_shared.utils.logger import logger
 from fastfetchbot_shared.utils.parse import (
     unix_timestamp_to_utc,
     get_html_text_length,
     wrap_text_into_html,
 )
+from src.config import JINJA2_ENV, XHS_COOKIE_STRING, XHS_SIGN_SERVER_URL
+from .adaptar import XhsSinglePostAdapter
 
 environment = JINJA2_ENV
 short_text_template = environment.get_template("xiaohongshu_short_text.jinja2")
@@ -42,78 +33,51 @@ def __init__(self, url: str, data: Any, **kwargs):
         self.raw_content = None
 
     async def get_item(self) -> dict:
-        await self.get_xiaohongshu()
+        await self._get_xiaohongshu()
         return self.to_dict()
 
-    async def get_xiaohongshu(self) -> None:
-        if self.url.find("xiaohongshu.com") == -1:
-            async with httpx.AsyncClient() as client:
-                resp = await client.get(
-                    self.url,
-                    headers=HEADERS,
-                    follow_redirects=True,
-                    timeout=HTTP_REQUEST_TIMEOUT,
-                )
-                if (
-                    resp.history
-                ):  # if there is a redirect, the request will have a response chain
-                    for h in resp.history:
-                        print(h.status_code, h.url)
-                    self.url = str(resp.url)
-        urlparser = urlparse(self.url)
-        self.id = urlparser.path.split("/")[-1]
-        crawler = XiaoHongShuCrawler()
-        account_pool = proxy_account_pool.create_account_pool()
-        crawler.init_config("xhs", "cookie", account_pool)
-        note_detail = None
-        for _ in range(5):
-            try:
-                note_detail = await crawler.start(id=self.id)
-                break
-            except Exception as e:
-                await asyncio.sleep(3)
-                logger.error(f"error: {e}")
-                logger.error(f"retrying...")
-        if not note_detail:
-            raise Exception("重试了这么多次还是无法签名成功，寄寄寄")
-        # logger.debug(f"json_data: {json.dumps(note_detail, ensure_ascii=False, indent=4)}")
-        parsed_data = self.process_note_json(note_detail)
-        await self.process_xiaohongshu_note(parsed_data)
+    async def _get_xiaohongshu(self) -> None:
+        async with XhsSinglePostAdapter(
+            cookies=XHS_COOKIE_STRING,
+            sign_server_endpoint=XHS_SIGN_SERVER_URL,
+        ) as adapter:
+            result = await adapter.fetch_post(note_url=self.url)
+        note = result["note"]
+        self.id = note.get("note_id")
+        self.url = result["url"]
+        await self._process_xiaohongshu_note(note)
 
-    async def process_xiaohongshu_note(self, json_data: dict):
+    async def _process_xiaohongshu_note(self, json_data: dict):
+        user = json_data.get("user", {}) or {}
         self.title = json_data.get("title")
-        self.author = json_data.get("author")
+        self.author = user.get("nickname")
         if not self.title and self.author:
             self.title = f"{self.author}的小红书笔记"
-        self.author_url = "https://www.xiaohongshu.com/user/profile/" + json_data.get(
-            "user_id"
+        self.author_url = (
+            "https://www.xiaohongshu.com/user/profile/" + user.get("user_id", "")
         )
-        self.raw_content = json_data.get("raw_content")
-        logger.debug(f"{json_data.get('created')}")
+        self.raw_content = json_data.get("desc", "")
+        raw_time = json_data.get("time", 0)
+        raw_updated = json_data.get("last_update_time", 0)
         self.created = (
-            unix_timestamp_to_utc(json_data.get("created") / 1000)
-            if json_data.get("created")
-            else None
+            unix_timestamp_to_utc(int(raw_time) / 1000) if raw_time else None
         )
         self.updated = (
-            unix_timestamp_to_utc(json_data.get("updated") / 1000)
-            if json_data.get("updated")
-            else None
+            unix_timestamp_to_utc(int(raw_updated) / 1000) if raw_updated else None
         )
-        self.like_count = json_data.get("like_count")
+        self.like_count = json_data.get("liked_count")
         self.collected_count = json_data.get("collected_count")
         self.comment_count = json_data.get("comment_count")
         self.share_count = json_data.get("share_count")
         self.ip_location = json_data.get("ip_location")
-        if json_data.get("image_list"):
-            for image_url in json_data.get("image_list"):
-                self.media_files.append(MediaFile(url=image_url, media_type="image"))
-        if json_data.get("video"):
-            self.media_files.append(
-                MediaFile(url=json_data.get("video"), media_type="video")
-            )
+        for image_url in json_data.get("image_list", []) or []:
+            self.media_files.append(MediaFile(url=image_url, media_type="image"))
+        video_urls = json_data.get("video_urls", []) or []
+        if video_urls:
+            self.media_files.append(MediaFile(url=video_urls[0], media_type="video"))
         data = self.__dict__
-        data["raw_content"] = data["raw_content"].replace("\t", "")
+        raw_content = self.raw_content or ""
+        data["raw_content"] = raw_content.replace("\t", "")
         if data["raw_content"].endswith("\n"):
             data["raw_content"] = data["raw_content"][:-1]
         self.text = short_text_template.render(data=data)
@@ -124,30 +88,7 @@ async def process_xiaohongshu_note(self, json_data: dict):
             if media_file.media_type == "image":
                 data["raw_content"] += f'<p><img src="{media_file.url}" alt=""/></p>'
             elif media_file.media_type == "video":
-                data[
-                    "raw_content"
-                ] += (
+                data["raw_content"] += (
                     f'<p><video src="{media_file.url}" controls="controls"></video></p>'
                 )
         self.content = content_template.render(data=data)
-
-    @staticmethod
-    def process_note_json(json_data: dict):
-        expression = """
-        {
-        title: title,
-        raw_content: desc,
-        author: user.nickname,
-        user_id: user.user_id,
-        image_list: image_list[*].url,
-        video: video.media.stream.h264[0].master_url,
-        like_count: interact_info.liked_count,
-        collected_count: interact_info.collected_count,
-        comment_count: interact_info.comment_count,
-        share_count: interact_info.share_count,
-        ip_location: ip_location,
-        created: time,
-        updated: last_update_time
-        }
-        """
-        return jmespath.search(expression, json_data)
diff --git a/apps/api/src/services/scrapers/xiaohongshu/adaptar.py b/apps/api/src/services/scrapers/xiaohongshu/adaptar.py
new file mode 100644
index 0000000..598d085
--- /dev/null
+++ b/apps/api/src/services/scrapers/xiaohongshu/adaptar.py
@@ -0,0 +1,517 @@
+from __future__ import annotations
+
+import json
+import re
+from typing import Any, Dict, List, Optional
+from urllib.parse import parse_qsl, urlencode, urlparse
+
+import httpx
+
+from fastfetchbot_shared.config import SIGN_SERVER_URL
+from fastfetchbot_shared.utils.logger import logger
+
+XHS_API_URL = "https://edith.xiaohongshu.com"
+XHS_WEB_URL = "https://www.xiaohongshu.com"
+
+
+def parse_xhs_note_url(note_url: str) -> Dict[str, str]:
+    """
+    Parse XHS note URL into note_id/xsec_token/xsec_source.
+    """
+    parsed = urlparse(note_url)
+    query = dict(parse_qsl(parsed.query, keep_blank_values=True))
+    path_parts = [part for part in parsed.path.split("/") if part]
+    if not path_parts:
+        raise ValueError(f"Invalid XHS note URL: {note_url}")
+    note_id = path_parts[-1]
+    if note_id in {"explore", "discovery", "item"}:
+        raise ValueError(f"Invalid XHS note URL path: {note_url}")
+    return {
+        "note_id": note_id,
+        "xsec_token": query.get("xsec_token", ""),
+        "xsec_source": query.get("xsec_source", ""),
+    }
+
+def get_pure_url(url: str) -> str:
+    parsed = urlparse(url)
+    return parsed.scheme + "://" + parsed.netloc + parsed.path
+
+
+class XhsSinglePostAdapter:
+    """Small adapter for fetching one Xiaohongshu post and optional comments."""
+
+    def __init__(
+            self,
+            cookies: str,
+            sign_server_endpoint: str = "",
+            timeout: float = 20.0,
+    ):
+        self.cookies = cookies.strip()
+        self.sign_server_endpoint = (sign_server_endpoint or SIGN_SERVER_URL).rstrip("/")
+        self.timeout = timeout
+        self._http = httpx.AsyncClient(timeout=timeout, follow_redirects=True)
+
+    async def close(self) -> None:
+        await self._http.aclose()
+
+    async def __aenter__(self) -> "XhsSinglePostAdapter":
+        return self
+
+    async def __aexit__(self, exc_type, exc, tb) -> None:  # type: ignore[override]
+        await self.close()
+
+    def _base_headers(self) -> Dict[str, str]:
+        return {
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "cache-control": "no-cache",
+            "content-type": "application/json;charset=UTF-8",
+            "origin": XHS_WEB_URL,
+            "pragma": "no-cache",
+            "referer": f"{XHS_WEB_URL}/",
+            "sec-ch-ua": '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
+            "sec-ch-ua-mobile": "?0",
+            "sec-ch-ua-platform": '"Windows"',
+            "sec-fetch-dest": "empty",
+            "sec-fetch-mode": "cors",
+            "sec-fetch-site": "same-site",
+            "user-agent": (
+                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+                "AppleWebKit/537.36 (KHTML, like Gecko) "
+                "Chrome/137.0.0.0 Safari/537.36"
+            ),
+            "cookie": self.cookies,
+        }
+
+    async def _sign_headers(self, uri: str, data: Optional[Any] = None) -> Dict[str, str]:
+        payload = {"uri": uri, "data": data, "cookies": self.cookies}
+        resp = await self._http.post(
+            f"{self.sign_server_endpoint}/signsrv/v1/xhs/sign",
+            json=payload,
+        )
+        resp.raise_for_status()
+        body = resp.json()
+        if not body.get("isok"):
+            raise RuntimeError(f"XHS sign server returned error: {body}")
+        sign = body.get("data", {}) or {}
+        required = ["x_s", "x_t", "x_s_common", "x_b3_traceid"]
+        missing = [key for key in required if key not in sign]
+        if missing:
+            raise RuntimeError(f"XHS sign response missing fields: {missing}")
+        headers = self._base_headers()
+        headers.update(
+            {
+                "X-s": sign["x_s"],
+                "X-t": sign["x_t"],
+                "x-s-common": sign["x_s_common"],
+                "X-B3-Traceid": sign["x_b3_traceid"],
+            }
+        )
+        return headers
+
+    async def fetch_post(
+            self,
+            note_url: str,
+            with_comments: bool = False,
+            max_comments: int = 0,
+            include_sub_comments: bool = False,
+    ) -> Dict[str, Any]:
+        """
+        Fetch one XHS post by full URL (recommended with xsec_token + xsec_source).
+        """
+        if not note_url.startswith(XHS_WEB_URL):
+            note_url = await self._get_redirection_url(note_url)
+        url_info = parse_xhs_note_url(note_url)
+        try:
+            note = await self._fetch_note_by_api(
+                note_id=url_info["note_id"],
+                xsec_token=url_info["xsec_token"],
+                xsec_source=url_info["xsec_source"],
+            )
+        except Exception as e:
+            logger.error(f"Failed to fetch note by API: {url_info['note_id']}, error: {e}")
+            note = None
+
+        if note is None:
+            note = await self._fetch_note_by_html(
+                note_id=url_info["note_id"],
+                xsec_token=url_info["xsec_token"],
+                xsec_source=url_info["xsec_source"],
+            )
+        if note is None:
+            raise RuntimeError(f"Cannot fetch note detail from API or HTML: {url_info['note_id']}")
+
+        result: Dict[str, Any] = {"platform": "xhs", "note": note, "comments": [], "url": get_pure_url(note_url)}
+        if with_comments:
+            try:
+                result["comments"] = await self._fetch_comments(
+                    note_id=url_info["note_id"],
+                    xsec_token=url_info["xsec_token"],
+                    max_comments=max_comments,
+                    include_sub_comments=include_sub_comments,
+                )
+            except Exception as e:
+                logger.error(f"Failed to fetch comments for note {url_info['note_id']}, error: {e}")
+                result["comments"] = []
+        return result
+
+    async def _signed_post(self, uri: str, data: Dict[str, Any]) -> Dict[str, Any]:
+        headers = await self._sign_headers(uri=uri, data=data)
+        json_str = json.dumps(data, separators=(",", ":"), ensure_ascii=False)
+        resp = await self._http.post(
+            f"{XHS_API_URL}{uri}",
+            content=json_str.encode("utf-8"),
+            headers=headers,
+        )
+        return self._parse_api_response(resp)
+
+    async def _signed_get(self, uri: str, params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+        params = params or {}
+        final_uri = uri
+        if params:
+            final_uri = f"{uri}?{urlencode(params, doseq=True)}"
+        headers = await self._sign_headers(uri=final_uri)
+        resp = await self._http.get(
+            f"{XHS_API_URL}{uri}",
+            params=params,
+            headers=headers,
+        )
+        return self._parse_api_response(resp)
+
+    @staticmethod
+    def _parse_api_response(resp: httpx.Response) -> Dict[str, Any]:
+        try:
+            body = resp.json()
+        except json.JSONDecodeError as exc:
+            raise RuntimeError(f"XHS API returned non-JSON: status={resp.status_code}") from exc
+
+        if resp.status_code in (461, 471):
+            verify_type = resp.headers.get("Verifytype", "")
+            verify_uuid = resp.headers.get("Verifyuuid", "")
+            raise RuntimeError(
+                f"XHS blocked by captcha: verify_type={verify_type}, verify_uuid={verify_uuid}"
+            )
+
+        if body.get("success"):
+            return body.get("data", {}) or {}
+        raise RuntimeError(f"XHS API error: status={resp.status_code}, body={body}")
+
+    async def _fetch_note_by_api(
+            self,
+            note_id: str,
+            xsec_token: str,
+            xsec_source: str,
+    ) -> Optional[Dict[str, Any]]:
+        data: Dict[str, Any] = {
+            "source_note_id": note_id,
+            "image_formats": ["jpg", "webp", "avif"],
+            "extra": {"need_body_topic": 1},
+        }
+        if xsec_token:
+            data["xsec_token"] = xsec_token
+            data["xsec_source"] = xsec_source
+
+        payload = await self._signed_post("/api/sns/web/v1/feed", data=data)
+        items = payload.get("items", []) or []
+        if not items:
+            return None
+        card = (items[0] or {}).get("note_card", {})
+        if not card:
+            return None
+        card["xsec_token"] = xsec_token
+        card["xsec_source"] = xsec_source
+        return self._normalize_note(card)
+
+    async def _fetch_note_by_html(
+            self,
+            note_id: str,
+            xsec_token: str,
+            xsec_source: str,
+    ) -> Optional[Dict[str, Any]]:
+        detail_url = f"{XHS_WEB_URL}/explore/{note_id}"
+        if xsec_token:
+            detail_url += f"?xsec_token={xsec_token}&xsec_source={xsec_source}"
+
+        resp = await self._http.get(detail_url, headers=self._base_headers())
+        if resp.status_code != 200:
+            return None
+
+        text = resp.text or ""
+        match = re.search(r"window.__INITIAL_STATE__=({.*})</script>", text)
+        if not match:
+            return None
+
+        try:
+            state = json.loads(match.group(1).replace("undefined", "null"))
+            note = (
+                state.get("note", {})
+                .get("noteDetailMap", {})
+                .get(note_id, {})
+                .get("note")
+            )
+        except json.JSONDecodeError:
+            return None
+
+        if not note:
+            return None
+        note["xsec_token"] = xsec_token
+        note["xsec_source"] = xsec_source
+        return self._normalize_note(note)
+
+    async def _fetch_comments(
+            self,
+            note_id: str,
+            xsec_token: str = "",
+            max_comments: int = 0,
+            include_sub_comments: bool = False,
+    ) -> List[Dict[str, Any]]:
+        comments: List[Dict[str, Any]] = []
+        cursor = ""
+        has_more = True
+
+        while has_more:
+            params: Dict[str, Any] = {
+                "note_id": note_id,
+                "cursor": cursor,
+                "top_comment_id": "",
+                "image_formats": "jpg,webp,avif",
+            }
+            if xsec_token:
+                params["xsec_token"] = xsec_token
+
+            payload = await self._signed_get("/api/sns/web/v2/comment/page", params=params)
+            raw_comments = payload.get("comments", []) or []
+            for item in raw_comments:
+                comments.append(
+                    self._normalize_comment(
+                        note_id=note_id,
+                        note_xsec_token=xsec_token,
+                        raw=item,
+                        root_comment_id="",
+                    )
+                )
+                if include_sub_comments:
+                    comments.extend(
+                        await self._fetch_sub_comments(
+                            note_id=note_id,
+                            root_comment=item,
+                            xsec_token=xsec_token,
+                        )
+                    )
+                if max_comments > 0 and len(comments) >= max_comments:
+                    return comments[:max_comments]
+
+            has_more = bool(payload.get("has_more", False))
+            cursor = str(payload.get("cursor", "") or "")
+            if not cursor and has_more:
+                break
+
+        return comments
+
+    async def _fetch_sub_comments(
+            self,
+            note_id: str,
+            root_comment: Dict[str, Any],
+            xsec_token: str,
+    ) -> List[Dict[str, Any]]:
+        results: List[Dict[str, Any]] = []
+        inline_sub_comments = root_comment.get("sub_comments", []) or []
+        root_comment_id = str(root_comment.get("id", ""))
+
+        for item in inline_sub_comments:
+            results.append(
+                self._normalize_comment(
+                    note_id=note_id,
+                    note_xsec_token=xsec_token,
+                    raw=item,
+                    root_comment_id=root_comment_id,
+                )
+            )
+
+        has_more = bool(root_comment.get("sub_comment_has_more", False))
+        cursor = str(root_comment.get("sub_comment_cursor", "") or "")
+        while has_more:
+            params: Dict[str, Any] = {
+                "note_id": note_id,
+                "root_comment_id": root_comment_id,
+                "num": 10,
+                "cursor": cursor,
+            }
+            if xsec_token:
+                params["xsec_token"] = xsec_token
+            payload = await self._signed_get("/api/sns/web/v2/comment/sub/page", params=params)
+            sub_comments = payload.get("comments", []) or []
+            for item in sub_comments:
+                results.append(
+                    self._normalize_comment(
+                        note_id=note_id,
+                        note_xsec_token=xsec_token,
+                        raw=item,
+                        root_comment_id=root_comment_id,
+                    )
+                )
+            has_more = bool(payload.get("has_more", False))
+            cursor = str(payload.get("cursor", "") or "")
+            if not cursor and has_more:
+                break
+        return results
+
+    def _normalize_note(self, note_item: Dict[str, Any]) -> Dict[str, Any]:
+        def _pick(data: Dict[str, Any], *keys: str, default: Any = None) -> Any:
+            if not isinstance(data, dict):
+                return default
+            for key in keys:
+                if key in data and data.get(key) is not None:
+                    return data.get(key)
+            return default
+
+        def _to_int(value: Any) -> int:
+            try:
+                return int(value or 0)
+            except (TypeError, ValueError):
+                return 0
+
+        user = _pick(note_item, "user", default={}) or {}
+        interact = _pick(note_item, "interact_info", "interactInfo", default={}) or {}
+        image_list = []
+        for item in _pick(note_item, "image_list", "imageList", default=[]) or []:
+            url = (
+                _pick(item, "url_default", "urlDefault", "url")
+                if isinstance(item, dict)
+                else None
+            )
+            if url:
+                image_list.append(str(url))
+
+        tag_list = []
+        for tag in _pick(note_item, "tag_list", "tagList", default=[]) or []:
+            if not isinstance(tag, dict):
+                continue
+            if tag.get("type") == "topic" and tag.get("name"):
+                tag_list.append(str(tag["name"]))
+
+        video_urls: List[str] = self._extract_video_urls(note_item)
+        note_id = str(_pick(note_item, "note_id", "noteId", default=""))
+        xsec_token = str(_pick(note_item, "xsec_token", "xsecToken", default=""))
+        xsec_source = str(
+            _pick(note_item, "xsec_source", "xsecSource", default="pc_search")
+            or "pc_search"
+        )
+        note_url = (
+            f"{XHS_WEB_URL}/explore/{note_id}"
+            f"?xsec_token={xsec_token}&xsec_source={xsec_source}"
+        )
+
+        return {
+            "note_id": note_id,
+            "type": str(_pick(note_item, "type", default="")),
+            "title": str(
+                _pick(note_item, "title", default="")
+                or str(_pick(note_item, "desc", default=""))[:255]
+            ),
+            "desc": str(_pick(note_item, "desc", default="")),
+            "video_urls": video_urls,
+            "time": str(_pick(note_item, "time", default="")),
+            "last_update_time": str(
+                _pick(note_item, "last_update_time", "lastUpdateTime", default="")
+            ),
+            "ip_location": str(_pick(note_item, "ip_location", "ipLocation", default="")),
+            "image_list": image_list,
+            "tag_list": tag_list,
+            "url": note_url,
+            "note_url": note_url,
+            "liked_count": _to_int(_pick(interact, "liked_count", "likedCount", default=0)),
+            "collected_count": _to_int(
+                _pick(interact, "collected_count", "collectedCount", default=0)
+            ),
+            "comment_count": _to_int(
+                _pick(interact, "comment_count", "commentCount", default=0)
+            ),
+            "share_count": _to_int(_pick(interact, "share_count", "shareCount", default=0)),
+            "user": {
+                "user_id": str(_pick(user, "user_id", "userId", default="")),
+                "nickname": str(_pick(user, "nickname", default="")),
+                "avatar": str(_pick(user, "avatar", "image", default="")),
+            },
+        }
+
+    @staticmethod
+    def _extract_video_urls(note_item: Dict[str, Any]) -> List[str]:
+        if note_item.get("type") != "video":
+            return []
+        video = note_item.get("video", {}) or {}
+        consumer = video.get("consumer", {}) or {}
+        origin_video_key = consumer.get("origin_video_key") or consumer.get("originVideoKey")
+        if origin_video_key:
+            return [f"http://sns-video-bd.xhscdn.com/{origin_video_key}"]
+
+        urls = []
+        h264_list = (
+                (video.get("media", {}) or {})
+                .get("stream", {})
+                .get("h264", [])
+                or []
+        )
+        for item in h264_list:
+            master_url = item.get("master_url")
+            if master_url:
+                urls.append(master_url)
+        return urls
+
+    def _normalize_comment(
+            self,
+            note_id: str,
+            note_xsec_token: str,
+            raw: Dict[str, Any],
+            root_comment_id: str = "",
+    ) -> Dict[str, Any]:
+        user = raw.get("user_info", {}) or {}
+        target = raw.get("target_comment", {}) or {}
+        pics = []
+        for item in raw.get("pictures", []) or []:
+            url = item.get("url_default")
+            if url:
+                pics.append(url)
+
+        return {
+            "comment_id": str(raw.get("id", "")),
+            "parent_comment_id": root_comment_id,
+            "target_comment_id": str(target.get("id", "")),
+            "note_id": note_id,
+            "content": str(raw.get("content", "")),
+            "create_time": str(raw.get("create_time", "")),
+            "ip_location": str(raw.get("ip_location", "")),
+            "sub_comment_count": int(raw.get("sub_comment_count", 0) or 0),
+            "like_count": int(raw.get("like_count", 0) or 0),
+            "pictures": pics,
+            "note_url": (
+                f"{XHS_WEB_URL}/explore/{note_id}"
+                f"?xsec_token={note_xsec_token}&xsec_source=pc_search"
+            ),
+            "user": {
+                "user_id": str(user.get("user_id", "")),
+                "nickname": str(user.get("nickname", "")),
+                "avatar": str(user.get("image", "")),
+            },
+        }
+
+    async def _get_redirection_url(self, note_url: str) -> str:
+        """Follow redirects from a short URL (e.g. xhslink.com) to the full xiaohongshu.com URL."""
+        redirect_headers = {
+            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+            "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
+            "user-agent": (
+                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+                "AppleWebKit/537.36 (KHTML, like Gecko) "
+                "Chrome/137.0.0.0 Safari/537.36"
+            ),
+        }
+        async with httpx.AsyncClient(timeout=self.timeout, follow_redirects=True) as client:
+            resp = await client.get(note_url, headers=redirect_headers)
+
+        final_url = str(resp.url)
+        if XHS_WEB_URL not in final_url and "xiaohongshu.com" not in final_url:
+            raise RuntimeError(
+                f"Short URL did not redirect to xiaohongshu.com: {note_url} -> {final_url}"
+            )
+        return final_url
diff --git a/apps/api/src/templates/xiaohongshu_content.jinja2 b/apps/api/src/templates/xiaohongshu_content.jinja2
index d26fbed..2bda357 100644
--- a/apps/api/src/templates/xiaohongshu_content.jinja2
+++ b/apps/api/src/templates/xiaohongshu_content.jinja2
@@ -1,7 +1,7 @@
 <h2>{{ data.title }}</h2>
-<p>作者：<a href="{{ author_url }}">{{ data.author }}</a></p>
+<p>作者：<a href="{{ data.author_url }}">{{ data.author }}</a></p>
 <p>发布于 {{ data.created }} 最近更新于 {{ data.updated }}</p>
-<p>收藏：{{ data.collected_count }} 转发：{{ data.share_count }} 评论：{{ data.comments_count }} 点赞：{{ data.like_count }}
+<p>收藏：{{ data.collected_count }} 转发：{{ data.share_count }} 评论：{{ data.comment_count }} 点赞：{{ data.like_count }}
 {% if data.ip_location %}
 来自{{ data.ip_location }}
 {% endif %}
diff --git a/packages/shared/fastfetchbot_shared/config.py b/packages/shared/fastfetchbot_shared/config.py
index ec80059..fca6987 100644
--- a/packages/shared/fastfetchbot_shared/config.py
+++ b/packages/shared/fastfetchbot_shared/config.py
@@ -17,3 +17,7 @@
 
 # Utils environment variables
 HTTP_REQUEST_TIMEOUT = env.get("HTTP_REQUEST_TIMEOUT", 30)
+
+# XHS (Xiaohongshu) shared configuration
+SIGN_SERVER_URL = env.get("SIGN_SERVER_URL", "http://localhost:8989")
+XHS_COOKIE_PATH = env.get("XHS_COOKIE_PATH", "")
diff --git a/packages/shared/fastfetchbot_shared/utils/config.py b/packages/shared/fastfetchbot_shared/utils/config.py
index bad822d..2406a07 100644
--- a/packages/shared/fastfetchbot_shared/utils/config.py
+++ b/packages/shared/fastfetchbot_shared/utils/config.py
@@ -51,5 +51,6 @@
 BANNED_PATTERNS = [
     r"chatgpt\.com\/share\/[A-Za-z0-9]+",
     r"gemini\/share\/[A-Za-z0-9]+",
-    r"t\.me\/[A-Za-z0-9]+"
+    r"t\.me\/[A-Za-z0-9]+",
+    r"github.com\/[A-Za-z0-9_-]+\/[A-Za-z0-9_-]+",
 ]
diff --git a/template.env b/template.env
index 429a75a..b5c6038 100644
--- a/template.env
+++ b/template.env
@@ -78,6 +78,14 @@ XIAOHONGSHU_WEBID=
 # The websession cookie of xiaohongshu. Default: `None`
 XIAOHONGSHU_WEBSESSION=
 
+# XHS Sign Server URL for the signing proxy. Default: `http://localhost:8989`
+SIGN_SERVER_URL=
+
+# Path to a file containing XHS cookies as a single-line string.
+# Default: conf/xhs_cookies.txt (relative to apps/api). Override with an absolute path if needed.
+# Takes priority over XIAOHONGSHU_A1/WEBID/WEBSESSION when the file exists.
+XHS_COOKIE_PATH=
+
 # OpenAI
 # The api key of OpenAI. Default: `None`
 OPENAI_API_KEY=

From e43dfadfcdb633534af71043b7b8d9728175bc3a Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Mon, 23 Feb 2026 16:57:53 -0600
Subject: [PATCH 4/8] clean: remove old copied code

---
 .../scrapers/xiaohongshu/xhs/__init__.py      |   2 -
 .../scrapers/xiaohongshu/xhs/base_crawler.py  |  35 ---
 .../scrapers/xiaohongshu/xhs/client.py        | 217 ---------------
 .../services/scrapers/xiaohongshu/xhs/core.py | 225 ---------------
 .../scrapers/xiaohongshu/xhs/exception.py     |   9 -
 .../scrapers/xiaohongshu/xhs/field.py         |  72 -----
 .../services/scrapers/xiaohongshu/xhs/help.py | 262 ------------------
 .../scrapers/xiaohongshu/xhs/login.py         | 132 ---------
 .../xiaohongshu/xhs/proxy_account_pool.py     | 132 ---------
 .../scrapers/xiaohongshu/xhs/utils.py         | 146 ----------
 10 files changed, 1232 deletions(-)
 delete mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/__init__.py
 delete mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/base_crawler.py
 delete mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/client.py
 delete mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/core.py
 delete mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/exception.py
 delete mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/field.py
 delete mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/help.py
 delete mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/login.py
 delete mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/proxy_account_pool.py
 delete mode 100644 apps/api/src/services/scrapers/xiaohongshu/xhs/utils.py

diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/__init__.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/__init__.py
deleted file mode 100644
index 947f034..0000000
--- a/apps/api/src/services/scrapers/xiaohongshu/xhs/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from .core import XiaoHongShuCrawler
-from .field import *
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/base_crawler.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/base_crawler.py
deleted file mode 100644
index d00b38a..0000000
--- a/apps/api/src/services/scrapers/xiaohongshu/xhs/base_crawler.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from abc import ABC, abstractmethod
-
-from .proxy_account_pool import AccountPool
-
-
-class AbstractCrawler(ABC):
-    @abstractmethod
-    def init_config(self, platform: str, login_type: str, account_pool: AccountPool):
-        pass
-
-    @abstractmethod
-    async def start(self):
-        pass
-
-    @abstractmethod
-    async def search(self):
-        pass
-
-
-class AbstractLogin(ABC):
-    @abstractmethod
-    async def begin(self):
-        pass
-
-    @abstractmethod
-    async def login_by_qrcode(self):
-        pass
-
-    @abstractmethod
-    async def login_by_mobile(self):
-        pass
-
-    @abstractmethod
-    async def login_by_cookies(self):
-        pass
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/client.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/client.py
deleted file mode 100644
index 367ba6a..0000000
--- a/apps/api/src/services/scrapers/xiaohongshu/xhs/client.py
+++ /dev/null
@@ -1,217 +0,0 @@
-import asyncio
-import json
-from typing import Dict
-
-import httpx
-from playwright.async_api import BrowserContext, Page
-
-from fastfetchbot_shared.utils.logger import logger
-from .exception import DataFetchError, IPBlockError
-from .field import SearchNoteType, SearchSortType
-from .help import get_search_id, sign
-from . import utils
-
-
-class XHSClient:
-    def __init__(
-        self,
-        timeout=10,
-        proxies=None,
-        *,
-        headers: Dict[str, str],
-        playwright_page: Page,
-        cookie_dict: Dict[str, str],
-    ):
-        self.proxies = proxies
-        self.timeout = timeout
-        self.headers = headers
-        self._host = "https://edith.xiaohongshu.com"
-        self.IP_ERROR_STR = "网络连接异常，请检查网络设置或重启试试"
-        self.IP_ERROR_CODE = 300012
-        self.NOTE_ABNORMAL_STR = "笔记状态异常，请稍后查看"
-        self.NOTE_ABNORMAL_CODE = -510001
-        self.playwright_page = playwright_page
-        self.cookie_dict = cookie_dict
-
-    async def _pre_headers(self, url: str, data=None):
-        encrypt_params = await self.playwright_page.evaluate(
-            "([url, data]) => window._webmsxyw(url,data)", [url, data]
-        )
-        local_storage = await self.playwright_page.evaluate("() => window.localStorage")
-        signs = sign(
-            a1=self.cookie_dict.get("a1", ""),
-            b1=local_storage.get("b1", ""),
-            x_s=encrypt_params.get("X-s", ""),
-            x_t=str(encrypt_params.get("X-t", "")),
-        )
-
-        headers = {
-            "X-S": signs["x-s"],
-            "X-T": signs["x-t"],
-            "x-S-Common": signs["x-s-common"],
-            "X-B3-Traceid": signs["x-b3-traceid"],
-        }
-        self.headers.update(headers)
-        return self.headers
-
-    async def request(self, method, url, **kwargs) -> Dict:
-        async with httpx.AsyncClient(proxies=self.proxies) as client:
-            response = await client.request(method, url, timeout=self.timeout, **kwargs)
-        data: Dict = response.json()
-        if data["success"]:
-            return data.get("data", data.get("success", {}))
-        elif data["code"] == self.IP_ERROR_CODE:
-            raise IPBlockError(self.IP_ERROR_STR)
-        else:
-            raise DataFetchError(data.get("msg", None))
-
-    async def get(self, uri: str, params=None) -> Dict:
-        final_uri = uri
-        if isinstance(params, dict):
-            final_uri = f"{uri}?" f"{'&'.join([f'{k}={v}' for k, v in params.items()])}"
-        headers = await self._pre_headers(final_uri)
-        return await self.request(
-            method="GET", url=f"{self._host}{final_uri}", headers=headers
-        )
-
-    async def post(self, uri: str, data: dict) -> Dict:
-        headers = await self._pre_headers(uri, data)
-        json_str = json.dumps(data, separators=(",", ":"), ensure_ascii=False)
-        return await self.request(
-            method="POST", url=f"{self._host}{uri}", data=json_str, headers=headers
-        )
-
-    async def ping(self) -> bool:
-        """get a note to check if login state is ok"""
-        logger.info("Begin to ping xhs...")
-        ping_flag = False
-        try:
-            note_card: Dict = await self.get_note_by_keyword(keyword="小红书")
-            if note_card.get("items"):
-                ping_flag = True
-        except Exception as e:
-            logger.error(f"Ping xhs failed: {e}, and try to login again...")
-            ping_flag = False
-        return ping_flag
-
-    async def update_cookies(self, browser_context: BrowserContext):
-        cookie_str, cookie_dict = utils.convert_cookies(await browser_context.cookies())
-        self.headers["Cookie"] = cookie_str
-        self.cookie_dict = cookie_dict
-
-    async def get_note_by_keyword(
-        self,
-        keyword: str,
-        page: int = 1,
-        page_size: int = 20,
-        sort: SearchSortType = SearchSortType.GENERAL,
-        note_type: SearchNoteType = SearchNoteType.ALL,
-    ) -> Dict:
-        """search note by keyword
-
-        :param keyword: what notes you want to search
-        :param page: page number, defaults to 1
-        :param page_size: page size, defaults to 20
-        :param sort: sort ordering, defaults to SearchSortType.GENERAL
-        :param note_type: note type, defaults to SearchNoteType.ALL
-        :return: {has_more: true, items: []}
-        """
-        uri = "/api/sns/web/v1/search/notes"
-        data = {
-            "keyword": keyword,
-            "page": page,
-            "page_size": page_size,
-            "search_id": get_search_id(),
-            "sort": sort.value,
-            "note_type": note_type.value,
-        }
-        return await self.post(uri, data)
-
-    async def get_note_by_id(self, note_id: str) -> Dict:
-        """
-        :param note_id: note_id you want to fetch
-        :return: {"time":1679019883000,"user":{"nickname":"nickname","avatar":"avatar","user_id":"user_id"},"image_list":[{"url":"https://sns-img-qc.xhscdn.com/c8e505ca-4e5f-44be-fe1c-ca0205a38bad","trace_id":"1000g00826s57r6cfu0005ossb1e9gk8c65d0c80","file_id":"c8e505ca-4e5f-44be-fe1c-ca0205a38bad","height":1920,"width":1440}],"tag_list":[{"id":"5be78cdfdb601f000100d0bc","name":"jk","type":"topic"}],"desc":"裙裙","interact_info":{"followed":false,"liked":false,"liked_count":"1732","collected":false,"collected_count":"453","comment_count":"30","share_count":"41"},"at_user_list":[],"last_update_time":1679019884000,"note_id":"6413cf6b00000000270115b5","type":"normal","title":"title"}
-        """
-        data = {"source_note_id": note_id}
-        uri = "/api/sns/web/v1/feed"
-        res = await self.post(uri, data)
-        res_dict: Dict = res["items"][0]["note_card"]
-        return res_dict
-
-    async def get_note_comments(self, note_id: str, cursor: str = "") -> Dict:
-        """get note comments
-        :param note_id: note id you want to fetch
-        :param cursor: last you get cursor, defaults to ""
-        :return: {"has_more": true,"cursor": "6422442d000000000700dcdb",comments: [],"user_id": "63273a77000000002303cc9b","time": 1681566542930}
-        """
-        uri = "/api/sns/web/v2/comment/page"
-        params = {"note_id": note_id, "cursor": cursor}
-        return await self.get(uri, params)
-
-    async def get_note_sub_comments(
-        self, note_id: str, root_comment_id: str, num: int = 30, cursor: str = ""
-    ):
-        """
-        get note sub comments
-        :param note_id: note id you want to fetch
-        :param root_comment_id: parent comment id
-        :param num: recommend 30, if num greater 30, it only return 30 comments
-        :param cursor: last you get cursor, defaults to ""
-        :return: {"has_more": true,"cursor": "6422442d000000000700dcdb",comments: [],"user_id": "63273a77000000002303cc9b","time": 1681566542930}
-        """
-        uri = "/api/sns/web/v2/comment/sub/page"
-        params = {
-            "note_id": note_id,
-            "root_comment_id": root_comment_id,
-            "num": num,
-            "cursor": cursor,
-        }
-        return await self.get(uri, params)
-
-    async def get_note_all_comments(
-        self, note_id: str, crawl_interval: float = 1.0, is_fetch_sub_comments=False
-    ):
-        """
-        get note all comments include sub comments
-        :param note_id:
-        :param crawl_interval:
-        :param is_fetch_sub_comments:
-        :return:
-        """
-
-        result = []
-        comments_has_more = True
-        comments_cursor = ""
-        while comments_has_more:
-            comments_res = await self.get_note_comments(note_id, comments_cursor)
-            comments_has_more = comments_res.get("has_more", False)
-            comments_cursor = comments_res.get("cursor", "")
-            comments = comments_res["comments"]
-            if not is_fetch_sub_comments:
-                result.extend(comments)
-                continue
-            # handle get sub comments
-            for comment in comments:
-                result.append(comment)
-                cur_sub_comment_count = int(comment["sub_comment_count"])
-                cur_sub_comments = comment["sub_comments"]
-                result.extend(cur_sub_comments)
-                sub_comments_has_more = (
-                    comment["sub_comment_has_more"]
-                    and len(cur_sub_comments) < cur_sub_comment_count
-                )
-                sub_comment_cursor = comment["sub_comment_cursor"]
-                while sub_comments_has_more:
-                    page_num = 30
-                    sub_comments_res = await self.get_note_sub_comments(
-                        note_id, comment["id"], num=page_num, cursor=sub_comment_cursor
-                    )
-                    sub_comments = sub_comments_res["comments"]
-                    sub_comments_has_more = (
-                        sub_comments_res["has_more"] and len(sub_comments) == page_num
-                    )
-                    sub_comment_cursor = sub_comments_res["cursor"]
-                    result.extend(sub_comments)
-                    await asyncio.sleep(crawl_interval)
-            await asyncio.sleep(crawl_interval)
-        return result
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/core.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/core.py
deleted file mode 100644
index 8e450a7..0000000
--- a/apps/api/src/services/scrapers/xiaohongshu/xhs/core.py
+++ /dev/null
@@ -1,225 +0,0 @@
-import asyncio
-import random
-from asyncio import Task
-from typing import Dict, List, Optional, Tuple
-
-from playwright.async_api import BrowserContext, BrowserType, Page, async_playwright
-
-from .base_crawler import AbstractCrawler
-from .proxy_account_pool import AccountPool
-
-# import xiaohongshu as xhs_model
-from fastfetchbot_shared.utils.logger import logger
-from src import config
-
-# from var import request_keyword_var
-from .client import XHSClient
-from .exception import DataFetchError
-from .login import XHSLogin
-from . import utils
-
-
-class XiaoHongShuCrawler(AbstractCrawler):
-    platform: str
-    login_type: str
-    context_page: Page
-    xhs_client: XHSClient
-    account_pool: AccountPool
-    browser_context: BrowserContext
-
-    def __init__(self) -> None:
-        self.index_url = "https://www.xiaohongshu.com"
-        self.user_agent = utils.get_user_agent()
-
-    def init_config(
-        self, platform: str, login_type: str, account_pool: AccountPool
-    ) -> None:
-        self.platform = platform
-        self.login_type = login_type
-        self.account_pool = account_pool
-
-    async def start(self, id: str) -> dict:
-        account_phone, playwright_proxy, httpx_proxy = self.create_proxy_info()
-        async with async_playwright() as playwright:
-            # Launch a browser context.
-            chromium = playwright.chromium
-            self.browser_context = await self.launch_browser(
-                chromium, playwright_proxy, self.user_agent, headless=True
-            )
-            # stealth.min.js is a js script to prevent the website from detecting the crawler.
-            await self.browser_context.add_init_script(path="app/utils/stealth.min.js")
-            # add a cookie attribute webId to avoid the appearance of a sliding captcha on the webpage
-            await self.browser_context.add_cookies(
-                [
-                    {
-                        "name": "webId",
-                        "value": "xxx123",  # any value
-                        "domain": ".xiaohongshu.com",
-                        "path": "/",
-                    }
-                ]
-            )
-            await asyncio.sleep(1)
-            self.context_page = await self.browser_context.new_page()
-            await asyncio.sleep(1)
-            await self.context_page.goto(self.index_url)
-            await asyncio.sleep(1)
-            # Create a client to interact with the xiaohongshu website.
-            self.xhs_client = await self.create_xhs_client(httpx_proxy)
-            if not await self.xhs_client.ping():
-                login_obj = XHSLogin(
-                    login_type=self.login_type,
-                    login_phone=account_phone,
-                    browser_context=self.browser_context,
-                    context_page=self.context_page,
-                    cookie_str=config.XIAOHONGSHU_COOKIES,
-                )
-                await asyncio.sleep(1)
-                await login_obj.begin()
-                await asyncio.sleep(1)
-                await self.xhs_client.update_cookies(
-                    browser_context=self.browser_context
-                )
-
-            # Search for notes and retrieve their comment information.
-            # await self.search()
-
-            logger.info("Xhs Crawler finished ...")
-            return await self.xhs_client.get_note_by_id(id)
-
-    async def search(self) -> None:
-        """Search for notes and retrieve their comment information."""
-        logger.info("Begin search xiaohongshu keywords")
-        xhs_limit_count = 20  # xhs limit page fixed value
-        for keyword in config.KEYWORDS.split(","):
-            # set keyword to context var
-            # request_keyword_var.set(keyword)
-            logger.info(f"Current search keyword: {keyword}")
-            page = 1
-            while page * xhs_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
-                note_id_list: List[str] = []
-                notes_res = await self.xhs_client.get_note_by_keyword(
-                    keyword=keyword,
-                    page=page,
-                )
-                semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
-                task_list = [
-                    self.get_note_detail(post_item.get("id"), semaphore)
-                    for post_item in notes_res.get("items", {})
-                    if post_item.get("model_type") not in ("rec_query", "hot_query")
-                ]
-                note_details = await asyncio.gather(*task_list)
-                for note_detail in note_details:
-                    if note_detail is not None:
-                        # await xhs_model.update_xhs_note(note_detail)
-                        note_id_list.append(note_detail.get("note_id"))
-                page += 1
-                logger.info(f"Note details: {note_details}")
-                await self.batch_get_note_comments(note_id_list)
-
-    async def get_note_detail(
-        self, note_id: str, semaphore: asyncio.Semaphore
-    ) -> Optional[Dict]:
-        """Get note detail"""
-        async with semaphore:
-            try:
-                return await self.xhs_client.get_note_by_id(note_id)
-            except DataFetchError as ex:
-                logger.error(f"Get note detail error: {ex}")
-                return None
-
-    async def batch_get_note_comments(self, note_list: List[str]):
-        """Batch get note comments"""
-        logger.info(f"Begin batch get note comments, note list: {note_list}")
-        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
-        task_list: List[Task] = []
-        for note_id in note_list:
-            task = asyncio.create_task(
-                self.get_comments(note_id, semaphore), name=note_id
-            )
-            task_list.append(task)
-        await asyncio.gather(*task_list)
-
-    async def get_comments(self, note_id: str, semaphore: asyncio.Semaphore):
-        """Get note comments"""
-        async with semaphore:
-            logger.info(f"Begin get note id comments {note_id}")
-            all_comments = await self.xhs_client.get_note_all_comments(
-                note_id=note_id, crawl_interval=random.random()
-            )
-            # for comment in all_comments:
-            #     await xhs_model.update_xhs_note_comment(
-            #         note_id=note_id, comment_item=comment
-            #     )
-
-    def create_proxy_info(self) -> Tuple[Optional[str], Optional[Dict], Optional[str]]:
-        """Create proxy info for playwright and httpx"""
-        # phone: 13012345671  ip_proxy: 111.122.xx.xx1:8888
-        phone, ip_proxy = self.account_pool.get_account()
-        if not config.XHS_ENABLE_IP_PROXY:
-            return phone, None, None
-        logger.info("Begin proxy info for playwright and httpx ...")
-        playwright_proxy = {
-            "server": f"{config.IP_PROXY_PROTOCOL}{ip_proxy}",
-            "username": config.IP_PROXY_USER,
-            "password": config.IP_PROXY_PASSWORD,
-        }
-        httpx_proxy = f"{config.IP_PROXY_PROTOCOL}{config.IP_PROXY_USER}:{config.IP_PROXY_PASSWORD}@{ip_proxy}"
-        return phone, playwright_proxy, httpx_proxy
-
-    async def create_xhs_client(self, httpx_proxy: Optional[str]) -> XHSClient:
-        """Create xhs client"""
-        logger.info("Begin create xiaohongshu API client ...")
-        cookie_str, cookie_dict = utils.convert_cookies(
-            await self.browser_context.cookies()
-        )
-        xhs_client_obj = XHSClient(
-            proxies=httpx_proxy,
-            headers={
-                "User-Agent": self.user_agent,
-                "Cookie": cookie_str,
-                "Origin": "https://www.xiaohongshu.com",
-                "Referer": "https://www.xiaohongshu.com",
-                "Content-Type": "application/json;charset=UTF-8",
-            },
-            playwright_page=self.context_page,
-            cookie_dict=cookie_dict,
-        )
-        return xhs_client_obj
-
-    async def launch_browser(
-        self,
-        chromium: BrowserType,
-        playwright_proxy: Optional[Dict],
-        user_agent: Optional[str],
-        headless: bool = True,
-    ) -> BrowserContext:
-        """Launch browser and create browser context"""
-        logger.info("Begin create browser context ...")
-        if config.XHS_SAVE_LOGIN_STATE:
-            # feat issue #14
-            # we will save login state to avoid login every time
-            user_data_dir = config.TEMP_DIR
-            # user_data_dir = os.path.join(
-            #     os.getcwd(), "browser_data",  self.platform
-            # )  # type: ignore
-            browser_context = await chromium.launch_persistent_context(
-                user_data_dir=user_data_dir,
-                accept_downloads=True,
-                headless=headless,
-                proxy=playwright_proxy,  # type: ignore
-                viewport={"width": 1920, "height": 1080},
-                user_agent=user_agent,
-            )
-            return browser_context
-        else:
-            browser = await chromium.launch(headless=headless, proxy=playwright_proxy)  # type: ignore
-            browser_context = await browser.new_context(
-                viewport={"width": 1920, "height": 1080}, user_agent=user_agent
-            )
-            return browser_context
-
-    async def close(self):
-        """Close browser context"""
-        await self.browser_context.close()
-        logger.info("Browser context closed ...")
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/exception.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/exception.py
deleted file mode 100644
index 1a8642e..0000000
--- a/apps/api/src/services/scrapers/xiaohongshu/xhs/exception.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from httpx import RequestError
-
-
-class DataFetchError(RequestError):
-    """something error when fetch"""
-
-
-class IPBlockError(RequestError):
-    """fetch so fast that the server block us ip"""
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/field.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/field.py
deleted file mode 100644
index fbac0d2..0000000
--- a/apps/api/src/services/scrapers/xiaohongshu/xhs/field.py
+++ /dev/null
@@ -1,72 +0,0 @@
-from enum import Enum
-from typing import NamedTuple
-
-
-class FeedType(Enum):
-    # 推荐
-    RECOMMEND = "homefeed_recommend"
-    # 穿搭
-    FASION = "homefeed.fashion_v3"
-    # 美食
-    FOOD = "homefeed.food_v3"
-    # 彩妆
-    COSMETICS = "homefeed.cosmetics_v3"
-    # 影视
-    MOVIE = "homefeed.movie_and_tv_v3"
-    # 职场
-    CAREER = "homefeed.career_v3"
-    # 情感
-    EMOTION = "homefeed.love_v3"
-    # 家居
-    HOURSE = "homefeed.household_product_v3"
-    # 游戏
-    GAME = "homefeed.gaming_v3"
-    # 旅行
-    TRAVEL = "homefeed.travel_v3"
-    # 健身
-    FITNESS = "homefeed.fitness_v3"
-
-
-class NoteType(Enum):
-    NORMAL = "normal"
-    VIDEO = "video"
-
-
-class SearchSortType(Enum):
-    """search sort type"""
-    # default
-    GENERAL = "general"
-    # most popular
-    MOST_POPULAR = "popularity_descending"
-    # Latest
-    LATEST = "time_descending"
-
-
-class SearchNoteType(Enum):
-    """search note type
-    """
-    # default
-    ALL = 0
-    # only video
-    VIDEO = 1
-    # only image
-    IMAGE = 2
-
-
-class Note(NamedTuple):
-    """note tuple"""
-    note_id: str
-    title: str
-    desc: str
-    type: str
-    user: dict
-    img_urls: list
-    video_url: str
-    tag_list: list
-    at_user_list: list
-    collected_count: str
-    comment_count: str
-    liked_count: str
-    share_count: str
-    time: int
-    last_update_time: int
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/help.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/help.py
deleted file mode 100644
index c1e191f..0000000
--- a/apps/api/src/services/scrapers/xiaohongshu/xhs/help.py
+++ /dev/null
@@ -1,262 +0,0 @@
-import ctypes
-import json
-import random
-import time
-import urllib.parse
-
-
-def sign(a1="", b1="", x_s="", x_t=""):
-    """
-    takes in a URI (uniform resource identifier), an optional data dictionary, and an optional ctime parameter. It returns a dictionary containing two keys: "x-s" and "x-t".
-    """
-    common = {
-        "s0": 5,  # getPlatformCode
-        "s1": "",
-        "x0": "1",  # localStorage.getItem("b1b1")
-        "x1": "3.3.0",  # version
-        "x2": "Windows",
-        "x3": "xhs-pc-web",
-        "x4": "1.4.4",
-        "x5": a1,  # cookie of a1
-        "x6": x_t,
-        "x7": x_s,
-        "x8": b1,  # localStorage.getItem("b1")
-        "x9": mrc(x_t + x_s + b1),
-        "x10": 1,  # getSigCount
-    }
-    encode_str = encodeUtf8(json.dumps(common, separators=(',', ':')))
-    x_s_common = b64Encode(encode_str)
-    x_b3_traceid = get_b3_trace_id()
-    return {
-        "x-s": x_s,
-        "x-t": x_t,
-        "x-s-common": x_s_common,
-        "x-b3-traceid": x_b3_traceid
-    }
-
-
-def get_b3_trace_id():
-    re = "abcdef0123456789"
-    je = 16
-    e = ""
-    for t in range(16):
-        e += re[random.randint(0, je - 1)]
-    return e
-
-
-def mrc(e):
-    ie = [
-        0, 1996959894, 3993919788, 2567524794, 124634137, 1886057615, 3915621685,
-        2657392035, 249268274, 2044508324, 3772115230, 2547177864, 162941995,
-        2125561021, 3887607047, 2428444049, 498536548, 1789927666, 4089016648,
-        2227061214, 450548861, 1843258603, 4107580753, 2211677639, 325883990,
-        1684777152, 4251122042, 2321926636, 335633487, 1661365465, 4195302755,
-        2366115317, 997073096, 1281953886, 3579855332, 2724688242, 1006888145,
-        1258607687, 3524101629, 2768942443, 901097722, 1119000684, 3686517206,
-        2898065728, 853044451, 1172266101, 3705015759, 2882616665, 651767980,
-        1373503546, 3369554304, 3218104598, 565507253, 1454621731, 3485111705,
-        3099436303, 671266974, 1594198024, 3322730930, 2970347812, 795835527,
-        1483230225, 3244367275, 3060149565, 1994146192, 31158534, 2563907772,
-        4023717930, 1907459465, 112637215, 2680153253, 3904427059, 2013776290,
-        251722036, 2517215374, 3775830040, 2137656763, 141376813, 2439277719,
-        3865271297, 1802195444, 476864866, 2238001368, 4066508878, 1812370925,
-        453092731, 2181625025, 4111451223, 1706088902, 314042704, 2344532202,
-        4240017532, 1658658271, 366619977, 2362670323, 4224994405, 1303535960,
-        984961486, 2747007092, 3569037538, 1256170817, 1037604311, 2765210733,
-        3554079995, 1131014506, 879679996, 2909243462, 3663771856, 1141124467,
-        855842277, 2852801631, 3708648649, 1342533948, 654459306, 3188396048,
-        3373015174, 1466479909, 544179635, 3110523913, 3462522015, 1591671054,
-        702138776, 2966460450, 3352799412, 1504918807, 783551873, 3082640443,
-        3233442989, 3988292384, 2596254646, 62317068, 1957810842, 3939845945,
-        2647816111, 81470997, 1943803523, 3814918930, 2489596804, 225274430,
-        2053790376, 3826175755, 2466906013, 167816743, 2097651377, 4027552580,
-        2265490386, 503444072, 1762050814, 4150417245, 2154129355, 426522225,
-        1852507879, 4275313526, 2312317920, 282753626, 1742555852, 4189708143,
-        2394877945, 397917763, 1622183637, 3604390888, 2714866558, 953729732,
-        1340076626, 3518719985, 2797360999, 1068828381, 1219638859, 3624741850,
-        2936675148, 906185462, 1090812512, 3747672003, 2825379669, 829329135,
-        1181335161, 3412177804, 3160834842, 628085408, 1382605366, 3423369109,
-        3138078467, 570562233, 1426400815, 3317316542, 2998733608, 733239954,
-        1555261956, 3268935591, 3050360625, 752459403, 1541320221, 2607071920,
-        3965973030, 1969922972, 40735498, 2617837225, 3943577151, 1913087877,
-        83908371, 2512341634, 3803740692, 2075208622, 213261112, 2463272603,
-        3855990285, 2094854071, 198958881, 2262029012, 4057260610, 1759359992,
-        534414190, 2176718541, 4139329115, 1873836001, 414664567, 2282248934,
-        4279200368, 1711684554, 285281116, 2405801727, 4167216745, 1634467795,
-        376229701, 2685067896, 3608007406, 1308918612, 956543938, 2808555105,
-        3495958263, 1231636301, 1047427035, 2932959818, 3654703836, 1088359270,
-        936918000, 2847714899, 3736837829, 1202900863, 817233897, 3183342108,
-        3401237130, 1404277552, 615818150, 3134207493, 3453421203, 1423857449,
-        601450431, 3009837614, 3294710456, 1567103746, 711928724, 3020668471,
-        3272380065, 1510334235, 755167117,
-    ]
-    o = -1
-
-    def right_without_sign(num: int, bit: int=0) -> int:
-        val = ctypes.c_uint32(num).value >> bit
-        MAX32INT = 4294967295
-        return (val + (MAX32INT + 1)) % (2 * (MAX32INT + 1)) - MAX32INT - 1
-
-    for n in range(57):
-        o = ie[(o & 255) ^ ord(e[n])] ^ right_without_sign(o, 8)
-    return o ^ -1 ^ 3988292384
-
-
-lookup = [
-    "Z",
-    "m",
-    "s",
-    "e",
-    "r",
-    "b",
-    "B",
-    "o",
-    "H",
-    "Q",
-    "t",
-    "N",
-    "P",
-    "+",
-    "w",
-    "O",
-    "c",
-    "z",
-    "a",
-    "/",
-    "L",
-    "p",
-    "n",
-    "g",
-    "G",
-    "8",
-    "y",
-    "J",
-    "q",
-    "4",
-    "2",
-    "K",
-    "W",
-    "Y",
-    "j",
-    "0",
-    "D",
-    "S",
-    "f",
-    "d",
-    "i",
-    "k",
-    "x",
-    "3",
-    "V",
-    "T",
-    "1",
-    "6",
-    "I",
-    "l",
-    "U",
-    "A",
-    "F",
-    "M",
-    "9",
-    "7",
-    "h",
-    "E",
-    "C",
-    "v",
-    "u",
-    "R",
-    "X",
-    "5",
-]
-
-
-def tripletToBase64(e):
-    return (
-            lookup[63 & (e >> 18)] +
-            lookup[63 & (e >> 12)] +
-            lookup[(e >> 6) & 63] +
-            lookup[e & 63]
-    )
-
-
-def encodeChunk(e, t, r):
-    m = []
-    for b in range(t, r, 3):
-        n = (16711680 & (e[b] << 16)) + \
-            ((e[b + 1] << 8) & 65280) + (e[b + 2] & 255)
-        m.append(tripletToBase64(n))
-    return ''.join(m)
-
-
-def b64Encode(e):
-    P = len(e)
-    W = P % 3
-    U = []
-    z = 16383
-    H = 0
-    Z = P - W
-    while H < Z:
-        U.append(encodeChunk(e, H, Z if H + z > Z else H + z))
-        H += z
-    if 1 == W:
-        F = e[P - 1]
-        U.append(lookup[F >> 2] + lookup[(F << 4) & 63] + "==")
-    elif 2 == W:
-        F = (e[P - 2] << 8) + e[P - 1]
-        U.append(lookup[F >> 10] + lookup[63 & (F >> 4)] +
-                 lookup[(F << 2) & 63] + "=")
-    return "".join(U)
-
-
-def encodeUtf8(e):
-    b = []
-    m = urllib.parse.quote(e, safe='~()*!.\'')
-    w = 0
-    while w < len(m):
-        T = m[w]
-        if T == "%":
-            E = m[w + 1] + m[w + 2]
-            S = int(E, 16)
-            b.append(S)
-            w += 2
-        else:
-            b.append(ord(T[0]))
-        w += 1
-    return b
-
-
-def base36encode(number, alphabet='0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'):
-    """Converts an integer to a base36 string."""
-    if not isinstance(number, int):
-        raise TypeError('number must be an integer')
-
-    base36 = ''
-    sign = ''
-
-    if number < 0:
-        sign = '-'
-        number = -number
-
-    if 0 <= number < len(alphabet):
-        return sign + alphabet[number]
-
-    while number != 0:
-        number, i = divmod(number, len(alphabet))
-        base36 = alphabet[i] + base36
-
-    return sign + base36
-
-
-def base36decode(number):
-    return int(number, 36)
-
-
-def get_search_id():
-    e = int(time.time() * 1000) << 64
-    t = int(random.uniform(0, 2147483646))
-    return base36encode((e + t))
-
-
-if __name__ == '__main__':
-    a = get_b3_trace_id()
-    print(a)
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/login.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/login.py
deleted file mode 100644
index fb671b0..0000000
--- a/apps/api/src/services/scrapers/xiaohongshu/xhs/login.py
+++ /dev/null
@@ -1,132 +0,0 @@
-import asyncio
-import functools
-import sys
-from typing import Optional
-
-# import redis
-from playwright.async_api import BrowserContext, Page
-from tenacity import RetryError, retry, retry_if_result, stop_after_attempt, wait_fixed
-
-from fastfetchbot_shared.utils.logger import logger
-
-# import config
-from .base_crawler import AbstractLogin
-from . import utils
-
-
-class XHSLogin(AbstractLogin):
-    def __init__(
-        self,
-        login_type: str,
-        browser_context: BrowserContext,
-        context_page: Page,
-        login_phone: Optional[str] = "",
-        cookie_str: dict = {},
-    ):
-        self.login_type = login_type
-        self.browser_context = browser_context
-        self.context_page = context_page
-        self.login_phone = login_phone
-        self.cookie_str = cookie_str
-
-    @retry(
-        stop=stop_after_attempt(20),
-        wait=wait_fixed(1),
-        retry=retry_if_result(lambda value: value is False),
-    )
-    async def check_login_state(self, no_logged_in_session: str) -> bool:
-        """
-        Check if the current login status is successful and return True otherwise return False
-        retry decorator will retry 20 times if the return value is False, and the retry interval is 1 second
-        if max retry times reached, raise RetryError
-        """
-        current_cookie = await self.browser_context.cookies()
-        _, cookie_dict = utils.convert_cookies(current_cookie)
-        current_web_session = cookie_dict.get("web_session")
-        if current_web_session != no_logged_in_session:
-            return True
-        return False
-
-    async def begin(self):
-        """Start login xiaohongshu"""
-        logger.info("Begin login xiaohongshu ...")
-        if self.login_type == "qrcode":
-            await self.login_by_qrcode()
-        elif self.login_type == "phone":
-            await self.login_by_mobile()
-        elif self.login_type == "cookie":
-            await self.login_by_cookies()
-        else:
-            raise ValueError(
-                "Invalid Login Type Currently only supported qrcode or phone or cookies ..."
-            )
-
-    async def login_by_mobile(self):
-        pass
-
-    async def login_by_qrcode(self):
-        """login xiaohongshu website and keep webdriver login state"""
-        logger.info("Begin login xiaohongshu by qrcode ...")
-        # login_selector = "div.login-container > div.left > div.qrcode > img"
-        qrcode_img_selector = "xpath=//img[@class='qrcode-img']"
-        # find login qrcode
-        base64_qrcode_img = await utils.find_login_qrcode(
-            self.context_page, selector=qrcode_img_selector
-        )
-        if not base64_qrcode_img:
-            logger.info("login failed , have not found qrcode please check ....")
-            # if this website does not automatically popup login dialog box, we will manual click login button
-            await asyncio.sleep(0.5)
-            login_button_ele = self.context_page.locator(
-                "xpath=//*[@id='app']/div[1]/div[2]/div[1]/ul/div[1]/button"
-            )
-            await login_button_ele.click()
-            base64_qrcode_img = await utils.find_login_qrcode(
-                self.context_page, selector=qrcode_img_selector
-            )
-            if not base64_qrcode_img:
-                sys.exit()
-
-        # get not logged session
-        current_cookie = await self.browser_context.cookies()
-        _, cookie_dict = utils.convert_cookies(current_cookie)
-        no_logged_in_session = cookie_dict.get("web_session")
-
-        # show login qrcode
-        # fix issue #12
-        # we need to use partial function to call show_qrcode function and run in executor
-        # then current asyncio event loop will not be blocked
-        partial_show_qrcode = functools.partial(utils.show_qrcode, base64_qrcode_img)
-        asyncio.get_running_loop().run_in_executor(
-            executor=None, func=partial_show_qrcode
-        )
-
-        logger.info(f"waiting for scan code login, remaining time is 20s")
-        try:
-            await self.check_login_state(no_logged_in_session)
-        except RetryError:
-            logger.info("Login xiaohongshu failed by qrcode login method ...")
-            sys.exit()
-
-        wait_redirect_seconds = 5
-        logger.info(
-            f"Login successful then wait for {wait_redirect_seconds} seconds redirect ..."
-        )
-        await asyncio.sleep(wait_redirect_seconds)
-
-    async def login_by_cookies(self):
-        """login xiaohongshu website by cookies"""
-        logger.info("Begin login xiaohongshu by cookie ...")
-        for key, value in self.cookie_str.items():
-            if key != "web_session":  # only set web_session cookie attr
-                continue
-            await self.browser_context.add_cookies(
-                [
-                    {
-                        "name": key,
-                        "value": value,
-                        "domain": ".xiaohongshu.com",
-                        "path": "/",
-                    }
-                ]
-            )
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/proxy_account_pool.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/proxy_account_pool.py
deleted file mode 100644
index 87c9202..0000000
--- a/apps/api/src/services/scrapers/xiaohongshu/xhs/proxy_account_pool.py
+++ /dev/null
@@ -1,132 +0,0 @@
-from typing import List, Optional, Set, Tuple
-
-from src import config
-
-
-class PhonePool:
-    """phone pool class"""
-
-    def __init__(self) -> None:
-        self.phones: List[str] = []
-        self.used_phones: Set[str] = set()
-
-    def add_phone(self, phone: str) -> bool:
-        """add phone to the pool"""
-        if phone not in self.phones:
-            self.phones.append(phone)
-            return True
-        return False
-
-    def remove_phone(self, phone: str) -> bool:
-        """remove phone from the pool"""
-        if phone in self.used_phones:
-            self.phones.remove(phone)
-            self.used_phones.remove(phone)
-            return True
-        return False
-
-    def get_phone(self) -> Optional[str]:
-        """get phone and mark as used"""
-        if self.phones:
-            left_phone = self.phones.pop(0)
-            self.used_phones.add(left_phone)
-            return left_phone
-        return None
-
-    def clear(self):
-        """clear phone pool"""
-        self.phones = []
-        self.used_phones = set()
-
-
-class IPPool:
-    def __init__(self) -> None:
-        self.ips: List[str] = []
-        self.used_ips: Set[str] = set()
-
-    def add_ip(self, ip):
-        """添加ip"""
-        if ip not in self.ips:
-            self.ips.append(ip)
-            return True
-        return False
-
-    def remove_ip(self, ip: str) -> bool:
-        """remove ip"""
-        if ip in self.used_ips:
-            self.ips.remove(ip)
-            self.used_ips.remove(ip)
-            return True
-        return False
-
-    def get_ip(self) -> Optional[str]:
-        """get ip and mark as used"""
-        if self.ips:
-            left_ips = self.ips.pop(0)
-            self.used_ips.add(left_ips)
-            return left_ips
-        return None
-
-    def clear(self):
-        """clear ip pool"""
-        self.ips = []
-        self.used_ips = set()
-
-
-class AccountPool:
-    """account pool class"""
-
-    def __init__(self):
-        self.phone_pool = PhonePool()
-        self.ip_pool = IPPool()
-
-    def add_account(self, phone: str, ip: str) -> bool:
-        """add account to pool with phone and ip"""
-        if self.phone_pool.add_phone(phone) and self.ip_pool.add_ip(ip):
-            return True
-        return False
-
-    def remove_account(self, phone: str, ip: str) -> bool:
-        """remove account from pool"""
-        if self.phone_pool.remove_phone(phone) and self.ip_pool.remove_ip(ip):
-            return True
-        return False
-
-    def get_account(self) -> Tuple[str, str]:
-        """get account if no account, reload account pool"""
-        phone = self.phone_pool.get_phone()
-        ip = self.ip_pool.get_ip()
-        # if not phone or not ip:
-        #     reload_account_pool(self)
-        #     return self.get_account()
-        return phone, ip
-
-    def clear_account(self):
-        """clear account pool"""
-        self.phone_pool.clear()
-        self.ip_pool.clear()
-
-
-def reload_account_pool(apo: AccountPool):
-    """reload account pool"""
-    apo.clear_account()
-    for phone, ip in zip(config.XHS_PHONE_LIST, config.XHS_IP_PROXY_LIST):
-        apo.add_account(phone, ip)
-
-
-def create_account_pool() -> AccountPool:
-    """create account pool"""
-    apo = AccountPool()
-    reload_account_pool(apo=apo)
-    return apo
-
-
-if __name__ == "__main__":
-    import time
-
-    ac_pool = create_account_pool()
-    p, i = ac_pool.get_account()
-    while p:
-        print(f"get phone:{p}, ip proxy:{i} from account pool")
-        p, i = ac_pool.get_account()
-        time.sleep(1)
diff --git a/apps/api/src/services/scrapers/xiaohongshu/xhs/utils.py b/apps/api/src/services/scrapers/xiaohongshu/xhs/utils.py
deleted file mode 100644
index 43c2f44..0000000
--- a/apps/api/src/services/scrapers/xiaohongshu/xhs/utils.py
+++ /dev/null
@@ -1,146 +0,0 @@
-import base64
-import logging
-import os
-import random
-import re
-import time
-from io import BytesIO
-from typing import Dict, List, Optional, Tuple
-from urllib.parse import urlparse
-
-import httpx
-
-from PIL import Image, ImageDraw
-from playwright.async_api import Cookie, Page
-
-
-async def find_login_qrcode(page: Page, selector: str) -> str:
-    """find login qrcode image from target selector"""
-    try:
-        elements = await page.wait_for_selector(
-            selector=selector,
-        )
-        login_qrcode_img = await elements.get_property("src")  # type: ignore
-        return str(login_qrcode_img)
-
-    except Exception as e:
-        print(e)
-        return ""
-
-
-def show_qrcode(qr_code) -> None:  # type: ignore
-    """parse base64 encode qrcode image and show it"""
-    qr_code = qr_code.split(",")[1]
-    qr_code = base64.b64decode(qr_code)
-    image = Image.open(BytesIO(qr_code))
-
-    # Add a square border around the QR code and display it within the border to improve scanning accuracy.
-    width, height = image.size
-    new_image = Image.new("RGB", (width + 20, height + 20), color=(255, 255, 255))
-    new_image.paste(image, (10, 10))
-    draw = ImageDraw.Draw(new_image)
-    draw.rectangle((0, 0, width + 19, height + 19), outline=(0, 0, 0), width=1)
-    new_image.show()
-
-
-def get_user_agent() -> str:
-    ua_list = [
-        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36",
-        "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
-        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.53 Safari/537.36",
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36",
-    ]
-    return random.choice(ua_list)
-
-
-def convert_cookies(cookies: Optional[List[Cookie]]) -> Tuple[str, Dict]:
-    if not cookies:
-        return "", {}
-    cookies_str = ";".join(
-        [f"{cookie.get('name')}={cookie.get('value')}" for cookie in cookies]
-    )
-    cookie_dict = dict()
-    for cookie in cookies:
-        cookie_dict[cookie.get("name")] = cookie.get("value")
-    return cookies_str, cookie_dict
-
-
-def convert_str_cookie_to_dict(cookie_str: str) -> Dict:
-    cookie_dict: Dict[str, str] = dict()
-    if not cookie_str:
-        return cookie_dict
-    for cookie in cookie_str.split(";"):
-        cookie = cookie.strip()
-        if not cookie:
-            continue
-        cookie_list = cookie.split("=")
-        if len(cookie_list) != 2:
-            continue
-        cookie_value = cookie_list[1]
-        if isinstance(cookie_value, list):
-            cookie_value = "".join(cookie_value)
-        cookie_dict[cookie_list[0]] = cookie_value
-    return cookie_dict
-
-
-def get_current_timestamp():
-    return int(time.time() * 1000)
-
-
-def match_interact_info_count(count_str: str) -> int:
-    if not count_str:
-        return 0
-
-    match = re.search(r"\d+", count_str)
-    if match:
-        number = match.group()
-        return int(number)
-    else:
-        return 0
-
-
-def init_loging_config():
-    level = logging.INFO
-    logging.basicConfig(
-        level=level,
-        format="%(asctime)s %(name)s %(levelname)s %(message)s ",
-        datefmt="%Y-%m-%d  %H:%M:%S",
-    )
-    _logger = logging.getLogger("MediaCrawler")
-    _logger.setLevel(level)
-    return _logger
-
-
-logger = init_loging_config()
-
-
-def get_track_simple(distance) -> List[int]:
-    track: List[int] = []
-    current = 0
-    mid = distance * 4 / 5
-    t = 0.2
-    v = 1
-
-    while current < distance:
-        if current < mid:
-            a = 4
-        else:
-            a = -3
-        v0 = v
-        v = v0 + a * t  # type: ignore
-        move = v0 * t + 1 / 2 * a * t * t
-        current += move  # type: ignore
-        track.append(round(move))
-    return track
-
-
-def get_tracks(distance: int, level: str = "easy") -> List[int]:
-    if level == "easy":
-        return get_track_simple(distance)
-    else:
-        from . import easing
-
-        _, tricks = easing.get_tracks(distance, seconds=2, ease_func="ease_out_expo")
-        return tricks

From 0d8b5c3d8d6f1bbac8590ac431afe559550d2a18 Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Mon, 23 Feb 2026 17:03:09 -0600
Subject: [PATCH 5/8] docs: update docs

---
 CLAUDE.md |  6 ++++--
 README.md | 42 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 09eb0e0..4f0b52f 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -33,7 +33,7 @@ The Telegram Bot communicates with the API server over HTTP (`API_SERVER_URL`).
 - **`main.py`** — FastAPI app setup, Sentry integration, lifecycle management
 - **`config.py`** — Environment variable handling, platform credentials
 - **`routers/`** — `scraper.py` (generic endpoint), `scraper_routers.py` (platform-specific), `inoreader.py`, `wechat.py`
-- **`services/scrapers/`** — `scraper_manager.py` orchestrates platform scrapers (twitter, weibo, bluesky, xiaohongshu, reddit, instagram, zhihu, douban, threads, wechat, general)
+- **`services/scrapers/`** — `scraper_manager.py` orchestrates platform scrapers (twitter, weibo, bluesky, xiaohongshu, reddit, instagram, zhihu, douban, threads, wechat, general); the Xiaohongshu scraper uses `xiaohongshu/adaptar.py` (`XhsSinglePostAdapter`) with an external sign server instead of the old Playwright-based crawler
 - **`services/file_export/`** — PDF generation, audio transcription (OpenAI), video download
 - **`services/amazon/s3.py`** — S3 storage integration
 - **`services/telegraph/`** — Telegraph content publishing
@@ -50,7 +50,7 @@ The Telegram Bot communicates with the API server over HTTP (`API_SERVER_URL`).
 
 ### Shared Library (`packages/shared/fastfetchbot_shared/`)
 
-- **`config.py`** — URL patterns (SOCIAL_MEDIA_WEBSITE_PATTERNS, VIDEO_WEBSITE_PATTERNS, BANNED_PATTERNS)
+- **`config.py`** — URL patterns (SOCIAL_MEDIA_WEBSITE_PATTERNS, VIDEO_WEBSITE_PATTERNS, BANNED_PATTERNS); shared env vars including `SIGN_SERVER_URL` and `XHS_COOKIE_PATH`
 - **`models/`** — `classes.py` (NamedBytesIO), `metadata_item.py`, `telegraph_item.py`, `url_metadata.py`
 - **`utils/`** — `parse.py` (URL parsing, HTML processing, `get_env_bool`), `image.py`, `logger.py`, `network.py`
 
@@ -128,6 +128,8 @@ See `template.env` for a complete reference. Key variables:
 - Most scrapers require authentication cookies/tokens
 - Use browser extension "Get cookies.txt LOCALLY" to extract cookies
 - Store Zhihu cookies in `conf/zhihu_cookies.json`
+- Store Xiaohongshu cookies in `conf/xhs_cookies.txt` (single-line cookie string, e.g. `a1=x; web_id=x; web_session=x`)
+- Xiaohongshu also requires an external **sign server** reachable at `SIGN_SERVER_URL` (default `http://localhost:8989`); the sign server is currently closed-source — you must supply your own compatible implementation
 - See `template.env` for all platform-specific variables (Twitter, Weibo, Xiaohongshu, Reddit, Instagram, Bluesky, etc.)
 
 ### Database
diff --git a/README.md b/README.md
index a0c0a49..83c037a 100644
--- a/README.md
+++ b/README.md
@@ -154,10 +154,46 @@ See `template.env` for a complete reference with comments.
 | Twitter | `TWITTER_CT0`, `TWITTER_AUTH_TOKEN` |
 | Reddit | `REDDIT_CLIENT_ID`, `REDDIT_CLIENT_SECRET`, `REDDIT_USERNAME`, `REDDIT_PASSWORD` |
 | Weibo | `WEIBO_COOKIES` |
-| Xiaohongshu | `XIAOHONGSHU_A1`, `XIAOHONGSHU_WEBID`, `XIAOHONGSHU_WEBSESSION` |
+| Xiaohongshu | See [Xiaohongshu Setup](#xiaohongshu-setup) below |
 | Instagram | `X_RAPIDAPI_KEY` |
 | Zhihu | Store cookies in `conf/zhihu_cookies.json` |
 
+#### Xiaohongshu Setup
+
+Xiaohongshu (XHS) API requests require a cryptographic signature (`x-s`, `x-t`, etc.) that must be computed by a dedicated signing proxy. FastFetchBot delegates this to an external **sign server**.
+
+> **Note:** We currently use a closed-source sign server. You will need to run your own compatible signing proxy and point `SIGN_SERVER_URL` at it.
+
+The sign server must accept `POST /signsrv/v1/xhs/sign` with a JSON body:
+
+```json
+{"uri": "/api/sns/web/v1/feed", "data": {...}, "cookies": "a1=..."}
+```
+
+and return:
+
+```json
+{"isok": true, "data": {"x_s": "...", "x_t": "...", "x_s_common": "...", "x_b3_traceid": "..."}}
+```
+
+**Cookie configuration** (two options; file takes priority):
+
+- **File (recommended):** Create `apps/api/conf/xhs_cookies.txt` containing your XHS cookies as a single line:
+  ```
+  a1=xxxxxxxx; web_id=xxxxxxxx; web_session=xxxxxxxx
+  ```
+  Log in to [xiaohongshu.com](https://www.xiaohongshu.com) in your browser, then copy the cookie values from DevTools → Application → Cookies, or use the [Get cookies.txt LOCALLY](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc) extension.
+
+- **Environment variables (legacy fallback):** Set `XIAOHONGSHU_A1`, `XIAOHONGSHU_WEBID`, and `XIAOHONGSHU_WEBSESSION` individually. Used only when the cookie file is absent.
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `SIGN_SERVER_URL` | `http://localhost:8989` | URL of the XHS signing proxy |
+| `XHS_COOKIE_PATH` | `conf/xhs_cookies.txt` | Path to cookie file (overrides default location) |
+| `XIAOHONGSHU_A1` | `None` | `a1` cookie value (legacy fallback) |
+| `XIAOHONGSHU_WEBID` | `None` | `web_id` cookie value (legacy fallback) |
+| `XIAOHONGSHU_WEBSESSION` | `None` | `web_session` cookie value (legacy fallback) |
+
 #### Cloud Services
 
 | Variable | Description |
@@ -193,7 +229,7 @@ See `template.env` for a complete reference with comments.
 - [x] WeChat Public Account Articles
 - [x] Zhihu
 - [x] Douban
-- [ ] Xiaohongshu
+- [x] Xiaohongshu
 
 ### Video
 
@@ -211,7 +247,7 @@ The GitHub Actions pipeline (`.github/workflows/ci.yml`) automatically builds an
 
 The HTML to Telegra.ph converter function is based on [html-telegraph-poster](https://github.com/mercuree/html-telegraph-poster). I separated it from this project as an independent Python package: [html-telegraph-poster-v2](https://github.com/aturret/html-telegraph-poster-v2).
 
-The Xiaohongshu scraper is based on [MediaCrawler](https://github.com/NanmiCoder/MediaCrawler).
+The original Xiaohongshu scraper was based on [MediaCrawler](https://github.com/NanmiCoder/MediaCrawler). The current implementation uses a custom httpx-based adapter with an external signing proxy.
 
 The Weibo scraper is based on [weiboSpider](https://github.com/dataabc/weiboSpider).
 

From 6ae16ee60c517c0910e0a11538006e6baac94117 Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Mon, 23 Feb 2026 17:33:50 -0600
Subject: [PATCH 6/8] Update .gitignore

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 .gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 72992e5..5f5986c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -273,4 +273,3 @@ conf/*
 /.claude/
 /apps/worker/conf/
 apps/worker/celerybeat-schedule.db
-apps/worker/celerybeat-schedule.db

From 772adab89306e880ed001db402d863ab08b4818e Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Mon, 23 Feb 2026 17:40:00 -0600
Subject: [PATCH 7/8] Update
 packages/shared/fastfetchbot_shared/utils/config.py

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 packages/shared/fastfetchbot_shared/utils/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/shared/fastfetchbot_shared/utils/config.py b/packages/shared/fastfetchbot_shared/utils/config.py
index 2406a07..a33b825 100644
--- a/packages/shared/fastfetchbot_shared/utils/config.py
+++ b/packages/shared/fastfetchbot_shared/utils/config.py
@@ -52,5 +52,5 @@
     r"chatgpt\.com\/share\/[A-Za-z0-9]+",
     r"gemini\/share\/[A-Za-z0-9]+",
     r"t\.me\/[A-Za-z0-9]+",
-    r"github.com\/[A-Za-z0-9_-]+\/[A-Za-z0-9_-]+",
+    r"github\.com\/[A-Za-z0-9_-]+\/[A-Za-z0-9_-]+",
 ]

From 5e248bc4c8ecb816c39ad96d3be04e72ea9c9ee7 Mon Sep 17 00:00:00 2001
From: aturret <enturreopy@gmail.com>
Date: Mon, 23 Feb 2026 18:31:14 -0600
Subject: [PATCH 8/8] feat: update video fetch

---
 apps/api/src/config.py                        |  3 +-
 .../services/scrapers/xiaohongshu/adaptar.py  | 71 +++++++++++++------
 .../shared/fastfetchbot_shared/utils/parse.py |  2 +-
 3 files changed, 51 insertions(+), 25 deletions(-)

diff --git a/apps/api/src/config.py b/apps/api/src/config.py
index dfad573..4da6b3b 100644
--- a/apps/api/src/config.py
+++ b/apps/api/src/config.py
@@ -6,6 +6,7 @@
 import gettext
 import secrets
 
+from fastfetchbot_shared.utils.logger import logger
 from fastfetchbot_shared.utils.parse import get_env_bool
 
 env = os.environ
@@ -102,7 +103,7 @@
         with open(xhs_cookie_path, "r", encoding="utf-8") as f:
             XHS_COOKIE_STRING = f.read().strip()
     except (IOError, OSError) as e:
-        print(f"Error reading XHS cookie file: {e}")
+        logger.error(f"Error reading XHS cookie file: {e}")
         XHS_COOKIE_STRING = ""
 else:
     # Fallback: build cookie string from individual env vars (backward compat)
diff --git a/apps/api/src/services/scrapers/xiaohongshu/adaptar.py b/apps/api/src/services/scrapers/xiaohongshu/adaptar.py
index 598d085..a3f53ae 100644
--- a/apps/api/src/services/scrapers/xiaohongshu/adaptar.py
+++ b/apps/api/src/services/scrapers/xiaohongshu/adaptar.py
@@ -33,6 +33,9 @@ def parse_xhs_note_url(note_url: str) -> Dict[str, str]:
     }
 
 def get_pure_url(url: str) -> str:
+    """
+    Get the pure URL without query parameters or fragment.
+    """
     parsed = urlparse(url)
     return parsed.scheme + "://" + parsed.netloc + parsed.path
 
@@ -48,6 +51,11 @@ def __init__(
     ):
         self.cookies = cookies.strip()
         self.sign_server_endpoint = (sign_server_endpoint or SIGN_SERVER_URL).rstrip("/")
+        if not self.sign_server_endpoint:
+            raise ValueError(
+                "XhsSinglePostAdapter requires a sign server URL. "
+                "Set SIGN_SERVER_URL in the environment or pass sign_server_endpoint explicitly."
+            )
         self.timeout = timeout
         self._http = httpx.AsyncClient(timeout=timeout, follow_redirects=True)
 
@@ -371,17 +379,19 @@ def _to_int(value: Any) -> int:
             except (TypeError, ValueError):
                 return 0
 
+        note_type = str(_pick(note_item, "type", default=""))
         user = _pick(note_item, "user", default={}) or {}
         interact = _pick(note_item, "interact_info", "interactInfo", default={}) or {}
         image_list = []
-        for item in _pick(note_item, "image_list", "imageList", default=[]) or []:
-            url = (
-                _pick(item, "url_default", "urlDefault", "url")
-                if isinstance(item, dict)
-                else None
-            )
-            if url:
-                image_list.append(str(url))
+        if note_type != "video":
+            for item in _pick(note_item, "image_list", "imageList", default=[]) or []:
+                url = (
+                    _pick(item, "url_default", "urlDefault", "url")
+                    if isinstance(item, dict)
+                    else None
+                )
+                if url:
+                    image_list.append(str(url))
 
         tag_list = []
         for tag in _pick(note_item, "tag_list", "tagList", default=[]) or []:
@@ -391,6 +401,9 @@ def _to_int(value: Any) -> int:
                 tag_list.append(str(tag["name"]))
 
         video_urls: List[str] = self._extract_video_urls(note_item)
+        if not video_urls and isinstance(note_item.get("note"), dict):
+            # Some payloads wrap the note body under `note`.
+            video_urls = self._extract_video_urls(note_item["note"])
         note_id = str(_pick(note_item, "note_id", "noteId", default=""))
         xsec_token = str(_pick(note_item, "xsec_token", "xsecToken", default=""))
         xsec_source = str(
@@ -404,7 +417,7 @@ def _to_int(value: Any) -> int:
 
         return {
             "note_id": note_id,
-            "type": str(_pick(note_item, "type", default="")),
+            "type": note_type,
             "title": str(
                 _pick(note_item, "title", default="")
                 or str(_pick(note_item, "desc", default=""))[:255]
@@ -437,26 +450,38 @@ def _to_int(value: Any) -> int:
 
     @staticmethod
     def _extract_video_urls(note_item: Dict[str, Any]) -> List[str]:
-        if note_item.get("type") != "video":
-            return []
+        note_type = str(note_item.get("type", "") or "")
         video = note_item.get("video", {}) or {}
+        if note_type != "video" and not isinstance(video, dict):
+            return []
+
         consumer = video.get("consumer", {}) or {}
         origin_video_key = consumer.get("origin_video_key") or consumer.get("originVideoKey")
         if origin_video_key:
             return [f"http://sns-video-bd.xhscdn.com/{origin_video_key}"]
 
-        urls = []
-        h264_list = (
-                (video.get("media", {}) or {})
-                .get("stream", {})
-                .get("h264", [])
-                or []
-        )
-        for item in h264_list:
-            master_url = item.get("master_url")
-            if master_url:
-                urls.append(master_url)
-        return urls
+        urls: List[str] = []
+        stream = (video.get("media", {}) or {}).get("stream", {}) or {}
+        # Prefer broadly compatible streams first, then fallback to newer codecs.
+        for stream_key in ("h264", "h265", "av1", "h266"):
+            for item in stream.get(stream_key, []) or []:
+                if not isinstance(item, dict):
+                    continue
+                master_url = item.get("master_url") or item.get("masterUrl")
+                if master_url:
+                    urls.append(str(master_url))
+                for backup_url in item.get("backup_urls", []) or item.get("backupUrls", []) or []:
+                    if backup_url:
+                        urls.append(str(backup_url))
+
+        # Keep ordering while deduplicating.
+        seen = set()
+        deduped: List[str] = []
+        for url in urls:
+            if url not in seen:
+                seen.add(url)
+                deduped.append(url)
+        return deduped
 
     def _normalize_comment(
             self,
diff --git a/packages/shared/fastfetchbot_shared/utils/parse.py b/packages/shared/fastfetchbot_shared/utils/parse.py
index 75076b7..61527af 100644
--- a/packages/shared/fastfetchbot_shared/utils/parse.py
+++ b/packages/shared/fastfetchbot_shared/utils/parse.py
@@ -84,7 +84,7 @@ async def get_url_metadata(url: str, ban_list: Optional[list] = None) -> UrlMeta
                     source = website
                     content_type = "video"
     # clear the url query
-    if source not in ["youtube", "bilibili", "wechat"]:
+    if source not in ["youtube", "bilibili", "wechat", "xiaohongshu"]:
         url = url_parser.scheme + "://" + url_parser.netloc + url_parser.path
     if source in ban_list:
         source = "banned"