diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..7ce3045
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,49 @@
+# Git
+.git
+.gitignore
+.gitattributes
+
+# Python
+__pycache__
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+venv/
+ENV/
+*.egg-info/
+dist/
+build/
+
+# IDE
+.vscode
+.idea
+*.swp
+*.swo
+*~
+
+# Documentation
+*.md
+LICENSE
+screenshots/
+
+# Docker
+Dockerfile
+docker-compose.yml
+.dockerignore
+
+# CI/CD
+.github/
+
+# Tests
+test_*.py
+tests/
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Models (if large)
+models/*.pth
+models/*.onnx
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
new file mode 100644
index 0000000..4763421
--- /dev/null
+++ b/.github/workflows/docker-publish.yml
@@ -0,0 +1,27 @@
+name: Docker Build
+
+on:
+  push:
+    branches: [ master ]
+  workflow_dispatch:
+
+jobs:
+  docker:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          push: true
+          tags: ${{ secrets.DOCKER_USERNAME }}/face-extraction-api:latest
diff --git a/.github/workflows/python-build.yml b/.github/workflows/python-build.yml
new file mode 100644
index 0000000..5a88205
--- /dev/null
+++ b/.github/workflows/python-build.yml
@@ -0,0 +1,31 @@
+name: Python Build
+
+on:
+  push:
+    branches: [ master, dev ]
+  pull_request:
+    branches: [ master ]
+  workflow_dispatch:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.9'
+          cache: 'pip'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Verify installation
+        run: |
+          python -c "import fastapi; import paddleocr; print('Dependencies installed successfully')"
diff --git a/Dockerfile b/Dockerfile
index 3af06ad..02bc5ee 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,4 @@
-# 使用 Python 3.8 slim 基础镜像
-FROM python:3.8-slim-bullseye
+FROM python:3.9-slim-bullseye
 
 # 暴露端口
 EXPOSE 8000
@@ -23,22 +22,17 @@ RUN sed -i "s@http://deb.debian.org@http://mirrors.tuna.tsinghua.edu.cn@g" /etc/
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
-# 换源并安装 Python 依赖
-RUN python3 -m pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --upgrade pip && \
-    pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
-    pip3 install --no-cache-dir -r requirements.txt
-
-# 复制项目文件
-COPY . /app
-
-# 创建模型目录并解压模型文件
-RUN mkdir -p /root/.paddleocr/whl/cls/ && \
-    mkdir -p /root/.paddleocr/whl/det/ch/ && \
-    mkdir -p /root/.paddleocr/whl/rec/ch/ && \
-    tar xf /app/pp-ocrv4/ch_ppocr_mobile_v2.0_cls_infer.tar -C /root/.paddleocr/whl/cls/ 2>/dev/null && \
-    tar xf /app/pp-ocrv4/ch_PP-OCRv4_det_infer.tar -C /root/.paddleocr/whl/det/ch/ && \
-    tar xf /app/pp-ocrv4/ch_PP-OCRv4_rec_infer.tar -C /root/.paddleocr/whl/rec/ch/ && \
-    rm -rf /app/pp-ocrv4/*.tar
+# apt-get换源并安装依赖（使用阿里云镜像）
+RUN sed -i "s@http://deb.debian.org@http://mirrors.aliyun.com@g" /etc/apt/sources.list
+RUN cat /etc/apt/sources.list
+RUN apt-get update && apt-get install -y libgl1 libgomp1 libglib2.0-0 libsm6 libxrender1 libxext6
+# 清理apt-get缓存
+RUN apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# pip换源并安装python依赖（使用阿里云镜像）
+RUN python3 -m pip install -i https://mirrors.aliyun.com/pypi/simple/ --upgrade pip
+RUN pip3 config set global.index-url https://mirrors.aliyun.com/pypi/simple/
+RUN pip3 install -r requirements.txt
 
 # 启动命令
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--workers", "2", "--log-config", "./log_conf.yaml"]
diff --git a/docker-compose.yml b/docker-compose.yml
index 376c59a..65114da 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -5,10 +5,15 @@ services:
   PaddleOCR:
     build: . 
     container_name: paddle_ocr_api # 自定义容器名
-    image: paddleocrfastapi:latest # 第2步自定义的镜像名与标签
+    image: paddleocrfastapi:2025 # 使用刚才构建的镜像
     environment:
       - TZ=Asia/Hong_Kong
       - OCR_LANGUAGE=ch
     ports:
-    - "8000:8000" # 自定义服务暴露端口, 8000为FastAPI默认端口, 不做修改,只能改前面的8000,不要忘了引号
+      - 8000:8000 # 自定义服务暴露端口, 8000为FastAPI默认端口, 不做修改
+    volumes:
+      - paddleocr_models:/root/.paddlex  # 持久化存储 OCR 模型
     restart: unless-stopped
+volumes:
+  paddleocr_models:
+    driver: local
\ No newline at end of file
diff --git a/requirements.in b/requirements.in
index d02fda4..f764b48 100644
--- a/requirements.in
+++ b/requirements.in
@@ -4,4 +4,5 @@ paddleocr
 uvicorn
 python-multipart
 requests
-numpy
\ No newline at end of file
+numpy
+opencv-python
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 032c5db..a1e47db 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,22 +1,22 @@
 #
-# This file is autogenerated by pip-compile with python 3.8
+# This file is autogenerated by pip-compile with python 3.9
 # To update, run:
 #
 #    pip-compile
 #
-fastapi==0.101.0
+fastapi==0.115.11
     # via -r requirements.in
-paddleocr==2.7.0.0
+paddleocr==3.3.2
     # via -r requirements.in
-paddlepaddle==2.5.1
+paddlepaddle==3.2.0
     # via -r requirements.in
-python-multipart==0.0.6
+python-multipart==0.0.20
     # via -r requirements.in
-uvicorn==0.23.2
+uvicorn==0.33.0
     # via -r requirements.in
-requests==2.31.0
+requests==2.32.3
     # via -r requirements.in
-numpy==1.23.5
+numpy==1.24.4
     # via -r requirements.in
 pyyaml
 
diff --git a/routers/ocr.py b/routers/ocr.py
index be07b1a..0776de2 100644
--- a/routers/ocr.py
+++ b/routers/ocr.py
@@ -7,64 +7,161 @@
 from utils.ImageHelper import base64_to_ndarray, bytes_to_ndarray
 import requests
 import os
+import tempfile
+import numpy as np
 
 OCR_LANGUAGE = os.environ.get("OCR_LANGUAGE", "ch")
 
 router = APIRouter(prefix="/ocr", tags=["OCR"])
 
-ocr = PaddleOCR(use_angle_cls=True, lang=OCR_LANGUAGE)
+ocr = PaddleOCR(
+    text_detection_model_name="PP-OCRv5_mobile_det",
+    text_recognition_model_name="PP-OCRv5_mobile_rec",
+    use_doc_orientation_classify=True,
+    use_doc_unwarping=True,
+    use_textline_orientation=False,
+    lang=OCR_LANGUAGE
+)
+def _np_to_list(value):
+    """仅把需要的 numpy 数组转换为 Python list，其它类型原样返回。"""
+    if isinstance(value, np.ndarray):
+        return value.tolist()
+    return value
+
+
+def extract_ocr_data(result):
+    """
+    从 PaddleOCR predict 返回结构中提取所需字段:
+    只返回数组形式: [{ 'input_path': str, 'rec_texts': list[str], 'rec_boxes': list }]
+    支持以下几种可能格式:
+    1. {'res': {...}}  # 单个结果
+    2. [{'res': {...}}, {'res': {...}}]  # 多页结果
+    3. 旧格式: list 内元素具备属性 input_path / rec_texts / rec_boxes
+    4. 直接是 dict {...}
+    """
+
+    debug = os.environ.get("OCR_DEBUG", "0") == "1"
+
+    def _extract_from_dict(d: dict):
+        if not isinstance(d, dict):
+            return None
+        core = d.get('res', d)  # 如果包含 res 用 res，没有就直接用自身
+        if not isinstance(core, dict):
+            return None
+        input_path = core.get('input_path', '')
+        rec_texts = core.get('rec_texts')
+        if rec_texts is None:
+            rec_texts = []
+        rec_boxes = core.get('rec_boxes')
+        if rec_boxes is None:
+            rec_boxes = []
+        # 仅当 rec_texts 是 list/tuple 才保留，否则置空，避免出现 numpy 数组被错误当成文本
+        rec_texts = list(rec_texts) if isinstance(rec_texts, (list, tuple)) else []
+        rec_boxes = _np_to_list(rec_boxes)
+        return {
+            'input_path': input_path,
+            'rec_texts': rec_texts,
+            'rec_boxes': rec_boxes
+        }
+
+    extracted = []
+
+    # 情况 A: result 是 list
+    if isinstance(result, list):
+        for item in result:
+            data = None
+            # dict 情况
+            if isinstance(item, dict):
+                data = _extract_from_dict(item)
+            else:  # 对象属性情况
+                input_path = getattr(item, 'input_path', '')
+                rec_texts = getattr(item, 'rec_texts', []) or []
+                rec_boxes = getattr(item, 'rec_boxes', []) or []
+                rec_boxes = _np_to_list(rec_boxes)
+                if rec_texts or rec_boxes or input_path:
+                    data = {
+                        'input_path': input_path,
+                        'rec_texts': list(rec_texts) if isinstance(rec_texts, (list, tuple)) else [],
+                        'rec_boxes': rec_boxes
+                    }
+            if data:
+                extracted.append(data)
+        if extracted:
+            return extracted
+
+    # 情况 B: result 是 dict
+    if isinstance(result, dict):
+        data = _extract_from_dict(result)
+        if data:
+            return [data]
+
+    # 其它未知情况: 返回空结构，便于前端处理
+    if debug:
+        print(f"[extract_ocr_data] 未识别的结果类型: {type(result)}")
+    return [{'input_path': '', 'rec_texts': [], 'rec_boxes': []}]
 
 
 @router.get('/predict-by-path', response_model=RestfulModel, summary="识别本地图片")
 def predict_by_path(image_path: str):
-    result = ocr.ocr(image_path, cls=True)
+    result = ocr.predict(input=image_path)
+    # 提取关键数据：input_path, rec_texts, rec_boxes
+    result_data = extract_ocr_data(result)
     restfulModel = RestfulModel(
-        resultcode=200, message="Success", data=result, cls=OCRModel)
+        resultcode=200, message="Success", data=result_data, cls=OCRModel)
     return restfulModel
 
 
 @router.post('/predict-by-base64', response_model=RestfulModel, summary="识别 Base64 数据")
 def predict_by_base64(base64model: Base64PostModel):
     img = base64_to_ndarray(base64model.base64_str)
-    result = ocr.ocr(img=img, cls=True)
+    
+    # 保存为临时文件，因为predict方法需要文件路径
+    with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file:
+        import cv2
+        cv2.imwrite(tmp_file.name, img)
+        result = ocr.predict(input=tmp_file.name)
+        os.unlink(tmp_file.name)  # 删除临时文件
+    
+    # 提取关键数据：input_path, rec_texts, rec_boxes
+    result_data = extract_ocr_data(result)
     restfulModel = RestfulModel(
-        resultcode=200, message="Success", data=result, cls=OCRModel)
+        resultcode=200, message="Success", data=result_data, cls=OCRModel)
     return restfulModel
 
 
 @router.post('/predict-by-file', response_model=RestfulModel, summary="识别上传文件")
 async def predict_by_file(file: UploadFile):
     restfulModel: RestfulModel = RestfulModel()
-    if file.filename.endswith((".jpg", ".png")):  # 只处理常见格式图片
+    if file.filename.endswith((".jpg", ".png", ".jpeg", ".bmp", ".tiff")):  # 支持更多图片格式
         restfulModel.resultcode = 200
         restfulModel.message = file.filename
         file_data = file.file
         file_bytes = file_data.read()
-        img = bytes_to_ndarray(file_bytes)
-        result = ocr.ocr(img=img, cls=True)
-        restfulModel.data = result
+        
+        # 保存为临时文件
+        with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file:
+            tmp_file.write(file_bytes)
+            tmp_file.flush()
+            result = ocr.predict(input=tmp_file.name)
+            os.unlink(tmp_file.name)  # 删除临时文件
+        
+        # 提取关键数据：input_path, rec_texts, rec_boxes
+        result_data = extract_ocr_data(result)
+        restfulModel.data = result_data
     else:
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
-            detail="请上传 .jpg 或 .png 格式图片"
+            detail="请上传支持的图片格式 (.jpg, .png, .jpeg, .bmp, .tiff)"
         )
     return restfulModel
 
 
 @router.get('/predict-by-url', response_model=RestfulModel, summary="识别图片 URL")
 async def predict_by_url(imageUrl: str):
-    restfulModel: RestfulModel = RestfulModel()
-    response = requests.get(imageUrl)
-    image_bytes = response.content
-    if image_bytes.startswith(b"\xff\xd8\xff") or image_bytes.startswith(b"\x89PNG\r\n\x1a\n"):  # 只处理常见格式图片 (jpg / png)
-        restfulModel.resultcode = 200
-        img = bytes_to_ndarray(image_bytes)
-        result = ocr.ocr(img=img, cls=True)
-        restfulModel.data = result
-        restfulModel.message = "Success"
-    else:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="请上传 .jpg 或 .png 格式图片"
-        )
+    # 直接使用URL进行predict
+    result = ocr.predict(input=imageUrl)
+    # 提取关键数据：input_path, rec_texts, rec_boxes
+    result_data = extract_ocr_data(result)
+    restfulModel = RestfulModel(
+        resultcode=200, message="Success", data=result_data)
     return restfulModel
diff --git a/test_paddleocr.py b/test_paddleocr.py
new file mode 100644
index 0000000..854f77f
--- /dev/null
+++ b/test_paddleocr.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""测试 PaddleOCR 3.3.2 初始化和功能"""
+
+def test_paddleocr_init():
+    """测试 PaddleOCR 初始化"""
+    try:
+        from paddleocr import PaddleOCR
+        print("正在初始化 PaddleOCR 3.3.2...")
+        
+        # 使用稳定配置初始化（3.3.2 版本）
+        ocr = PaddleOCR(
+            use_textline_orientation=False,  # 新版本参数
+            lang='ch'
+        )
+        print("✓ PaddleOCR 3.3.2 初始化成功")
+        return True
+        
+    except Exception as e:
+        print(f"✗ PaddleOCR 初始化失败: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+if __name__ == "__main__":
+    print("=" * 60)
+    print("测试 PaddleOCR 3.3.2 版本")
+    print("=" * 60)
+    
+    if test_paddleocr_init():
+        print("\n✓ 测试通过！PaddleOCR 3.3.2 可以正常使用")
+    else:
+        print("\n✗ 测试失败！")