diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..7ce3045 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,49 @@ +# Git +.git +.gitignore +.gitattributes + +# Python +__pycache__ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +*.egg-info/ +dist/ +build/ + +# IDE +.vscode +.idea +*.swp +*.swo +*~ + +# Documentation +*.md +LICENSE +screenshots/ + +# Docker +Dockerfile +docker-compose.yml +.dockerignore + +# CI/CD +.github/ + +# Tests +test_*.py +tests/ + +# OS +.DS_Store +Thumbs.db + +# Models (if large) +models/*.pth +models/*.onnx diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 0000000..4763421 --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,27 @@ +name: Docker Build + +on: + push: + branches: [ master ] + workflow_dispatch: + +jobs: + docker: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Build and push Docker image + uses: docker/build-push-action@v4 + with: + context: . + push: true + tags: ${{ secrets.DOCKER_USERNAME }}/face-extraction-api:latest diff --git a/.github/workflows/python-build.yml b/.github/workflows/python-build.yml new file mode 100644 index 0000000..5a88205 --- /dev/null +++ b/.github/workflows/python-build.yml @@ -0,0 +1,31 @@ +name: Python Build + +on: + push: + branches: [ master, dev ] + pull_request: + branches: [ master ] + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Set up Python 3.9 + uses: actions/setup-python@v4 + with: + python-version: '3.9' + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Verify installation + run: | + python -c "import fastapi; import paddleocr; print('Dependencies installed successfully')" diff --git a/Dockerfile b/Dockerfile index 3af06ad..02bc5ee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,4 @@ -# 使用 Python 3.8 slim 基础镜像 -FROM python:3.8-slim-bullseye +FROM python:3.9-slim-bullseye # 暴露端口 EXPOSE 8000 @@ -23,22 +22,17 @@ RUN sed -i "s@http://deb.debian.org@http://mirrors.tuna.tsinghua.edu.cn@g" /etc/ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# 换源并安装 Python 依赖 -RUN python3 -m pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --upgrade pip && \ - pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \ - pip3 install --no-cache-dir -r requirements.txt - -# 复制项目文件 -COPY . /app - -# 创建模型目录并解压模型文件 -RUN mkdir -p /root/.paddleocr/whl/cls/ && \ - mkdir -p /root/.paddleocr/whl/det/ch/ && \ - mkdir -p /root/.paddleocr/whl/rec/ch/ && \ - tar xf /app/pp-ocrv4/ch_ppocr_mobile_v2.0_cls_infer.tar -C /root/.paddleocr/whl/cls/ 2>/dev/null && \ - tar xf /app/pp-ocrv4/ch_PP-OCRv4_det_infer.tar -C /root/.paddleocr/whl/det/ch/ && \ - tar xf /app/pp-ocrv4/ch_PP-OCRv4_rec_infer.tar -C /root/.paddleocr/whl/rec/ch/ && \ - rm -rf /app/pp-ocrv4/*.tar +# apt-get换源并安装依赖(使用阿里云镜像) +RUN sed -i "s@http://deb.debian.org@http://mirrors.aliyun.com@g" /etc/apt/sources.list +RUN cat /etc/apt/sources.list +RUN apt-get update && apt-get install -y libgl1 libgomp1 libglib2.0-0 libsm6 libxrender1 libxext6 +# 清理apt-get缓存 +RUN apt-get clean && rm -rf /var/lib/apt/lists/* + +# pip换源并安装python依赖(使用阿里云镜像) +RUN python3 -m pip install -i https://mirrors.aliyun.com/pypi/simple/ --upgrade pip +RUN pip3 config set global.index-url https://mirrors.aliyun.com/pypi/simple/ +RUN pip3 install -r requirements.txt # 启动命令 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--workers", "2", "--log-config", "./log_conf.yaml"] diff --git a/docker-compose.yml b/docker-compose.yml index 376c59a..65114da 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,10 +5,15 @@ services: PaddleOCR: build: . container_name: paddle_ocr_api # 自定义容器名 - image: paddleocrfastapi:latest # 第2步自定义的镜像名与标签 + image: paddleocrfastapi:2025 # 使用刚才构建的镜像 environment: - TZ=Asia/Hong_Kong - OCR_LANGUAGE=ch ports: - - "8000:8000" # 自定义服务暴露端口, 8000为FastAPI默认端口, 不做修改,只能改前面的8000,不要忘了引号 + - 8000:8000 # 自定义服务暴露端口, 8000为FastAPI默认端口, 不做修改 + volumes: + - paddleocr_models:/root/.paddlex # 持久化存储 OCR 模型 restart: unless-stopped +volumes: + paddleocr_models: + driver: local \ No newline at end of file diff --git a/requirements.in b/requirements.in index d02fda4..f764b48 100644 --- a/requirements.in +++ b/requirements.in @@ -4,4 +4,5 @@ paddleocr uvicorn python-multipart requests -numpy \ No newline at end of file +numpy +opencv-python \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 032c5db..a1e47db 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,22 +1,22 @@ # -# This file is autogenerated by pip-compile with python 3.8 +# This file is autogenerated by pip-compile with python 3.9 # To update, run: # # pip-compile # -fastapi==0.101.0 +fastapi==0.115.11 # via -r requirements.in -paddleocr==2.7.0.0 +paddleocr==3.3.2 # via -r requirements.in -paddlepaddle==2.5.1 +paddlepaddle==3.2.0 # via -r requirements.in -python-multipart==0.0.6 +python-multipart==0.0.20 # via -r requirements.in -uvicorn==0.23.2 +uvicorn==0.33.0 # via -r requirements.in -requests==2.31.0 +requests==2.32.3 # via -r requirements.in -numpy==1.23.5 +numpy==1.24.4 # via -r requirements.in pyyaml diff --git a/routers/ocr.py b/routers/ocr.py index be07b1a..0776de2 100644 --- a/routers/ocr.py +++ b/routers/ocr.py @@ -7,64 +7,161 @@ from utils.ImageHelper import base64_to_ndarray, bytes_to_ndarray import requests import os +import tempfile +import numpy as np OCR_LANGUAGE = os.environ.get("OCR_LANGUAGE", "ch") router = APIRouter(prefix="/ocr", tags=["OCR"]) -ocr = PaddleOCR(use_angle_cls=True, lang=OCR_LANGUAGE) +ocr = PaddleOCR( + text_detection_model_name="PP-OCRv5_mobile_det", + text_recognition_model_name="PP-OCRv5_mobile_rec", + use_doc_orientation_classify=True, + use_doc_unwarping=True, + use_textline_orientation=False, + lang=OCR_LANGUAGE +) +def _np_to_list(value): + """仅把需要的 numpy 数组转换为 Python list,其它类型原样返回。""" + if isinstance(value, np.ndarray): + return value.tolist() + return value + + +def extract_ocr_data(result): + """ + 从 PaddleOCR predict 返回结构中提取所需字段: + 只返回数组形式: [{ 'input_path': str, 'rec_texts': list[str], 'rec_boxes': list }] + 支持以下几种可能格式: + 1. {'res': {...}} # 单个结果 + 2. [{'res': {...}}, {'res': {...}}] # 多页结果 + 3. 旧格式: list 内元素具备属性 input_path / rec_texts / rec_boxes + 4. 直接是 dict {...} + """ + + debug = os.environ.get("OCR_DEBUG", "0") == "1" + + def _extract_from_dict(d: dict): + if not isinstance(d, dict): + return None + core = d.get('res', d) # 如果包含 res 用 res,没有就直接用自身 + if not isinstance(core, dict): + return None + input_path = core.get('input_path', '') + rec_texts = core.get('rec_texts') + if rec_texts is None: + rec_texts = [] + rec_boxes = core.get('rec_boxes') + if rec_boxes is None: + rec_boxes = [] + # 仅当 rec_texts 是 list/tuple 才保留,否则置空,避免出现 numpy 数组被错误当成文本 + rec_texts = list(rec_texts) if isinstance(rec_texts, (list, tuple)) else [] + rec_boxes = _np_to_list(rec_boxes) + return { + 'input_path': input_path, + 'rec_texts': rec_texts, + 'rec_boxes': rec_boxes + } + + extracted = [] + + # 情况 A: result 是 list + if isinstance(result, list): + for item in result: + data = None + # dict 情况 + if isinstance(item, dict): + data = _extract_from_dict(item) + else: # 对象属性情况 + input_path = getattr(item, 'input_path', '') + rec_texts = getattr(item, 'rec_texts', []) or [] + rec_boxes = getattr(item, 'rec_boxes', []) or [] + rec_boxes = _np_to_list(rec_boxes) + if rec_texts or rec_boxes or input_path: + data = { + 'input_path': input_path, + 'rec_texts': list(rec_texts) if isinstance(rec_texts, (list, tuple)) else [], + 'rec_boxes': rec_boxes + } + if data: + extracted.append(data) + if extracted: + return extracted + + # 情况 B: result 是 dict + if isinstance(result, dict): + data = _extract_from_dict(result) + if data: + return [data] + + # 其它未知情况: 返回空结构,便于前端处理 + if debug: + print(f"[extract_ocr_data] 未识别的结果类型: {type(result)}") + return [{'input_path': '', 'rec_texts': [], 'rec_boxes': []}] @router.get('/predict-by-path', response_model=RestfulModel, summary="识别本地图片") def predict_by_path(image_path: str): - result = ocr.ocr(image_path, cls=True) + result = ocr.predict(input=image_path) + # 提取关键数据:input_path, rec_texts, rec_boxes + result_data = extract_ocr_data(result) restfulModel = RestfulModel( - resultcode=200, message="Success", data=result, cls=OCRModel) + resultcode=200, message="Success", data=result_data, cls=OCRModel) return restfulModel @router.post('/predict-by-base64', response_model=RestfulModel, summary="识别 Base64 数据") def predict_by_base64(base64model: Base64PostModel): img = base64_to_ndarray(base64model.base64_str) - result = ocr.ocr(img=img, cls=True) + + # 保存为临时文件,因为predict方法需要文件路径 + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file: + import cv2 + cv2.imwrite(tmp_file.name, img) + result = ocr.predict(input=tmp_file.name) + os.unlink(tmp_file.name) # 删除临时文件 + + # 提取关键数据:input_path, rec_texts, rec_boxes + result_data = extract_ocr_data(result) restfulModel = RestfulModel( - resultcode=200, message="Success", data=result, cls=OCRModel) + resultcode=200, message="Success", data=result_data, cls=OCRModel) return restfulModel @router.post('/predict-by-file', response_model=RestfulModel, summary="识别上传文件") async def predict_by_file(file: UploadFile): restfulModel: RestfulModel = RestfulModel() - if file.filename.endswith((".jpg", ".png")): # 只处理常见格式图片 + if file.filename.endswith((".jpg", ".png", ".jpeg", ".bmp", ".tiff")): # 支持更多图片格式 restfulModel.resultcode = 200 restfulModel.message = file.filename file_data = file.file file_bytes = file_data.read() - img = bytes_to_ndarray(file_bytes) - result = ocr.ocr(img=img, cls=True) - restfulModel.data = result + + # 保存为临时文件 + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file: + tmp_file.write(file_bytes) + tmp_file.flush() + result = ocr.predict(input=tmp_file.name) + os.unlink(tmp_file.name) # 删除临时文件 + + # 提取关键数据:input_path, rec_texts, rec_boxes + result_data = extract_ocr_data(result) + restfulModel.data = result_data else: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, - detail="请上传 .jpg 或 .png 格式图片" + detail="请上传支持的图片格式 (.jpg, .png, .jpeg, .bmp, .tiff)" ) return restfulModel @router.get('/predict-by-url', response_model=RestfulModel, summary="识别图片 URL") async def predict_by_url(imageUrl: str): - restfulModel: RestfulModel = RestfulModel() - response = requests.get(imageUrl) - image_bytes = response.content - if image_bytes.startswith(b"\xff\xd8\xff") or image_bytes.startswith(b"\x89PNG\r\n\x1a\n"): # 只处理常见格式图片 (jpg / png) - restfulModel.resultcode = 200 - img = bytes_to_ndarray(image_bytes) - result = ocr.ocr(img=img, cls=True) - restfulModel.data = result - restfulModel.message = "Success" - else: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="请上传 .jpg 或 .png 格式图片" - ) + # 直接使用URL进行predict + result = ocr.predict(input=imageUrl) + # 提取关键数据:input_path, rec_texts, rec_boxes + result_data = extract_ocr_data(result) + restfulModel = RestfulModel( + resultcode=200, message="Success", data=result_data) return restfulModel diff --git a/test_paddleocr.py b/test_paddleocr.py new file mode 100644 index 0000000..854f77f --- /dev/null +++ b/test_paddleocr.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""测试 PaddleOCR 3.3.2 初始化和功能""" + +def test_paddleocr_init(): + """测试 PaddleOCR 初始化""" + try: + from paddleocr import PaddleOCR + print("正在初始化 PaddleOCR 3.3.2...") + + # 使用稳定配置初始化(3.3.2 版本) + ocr = PaddleOCR( + use_textline_orientation=False, # 新版本参数 + lang='ch' + ) + print("✓ PaddleOCR 3.3.2 初始化成功") + return True + + except Exception as e: + print(f"✗ PaddleOCR 初始化失败: {e}") + import traceback + traceback.print_exc() + return False + +if __name__ == "__main__": + print("=" * 60) + print("测试 PaddleOCR 3.3.2 版本") + print("=" * 60) + + if test_paddleocr_init(): + print("\n✓ 测试通过!PaddleOCR 3.3.2 可以正常使用") + else: + print("\n✗ 测试失败!")