Ptesa

pip install pytesseract pillow opencv-python

import pytesseract
from PIL import Image
import cv2
import json
import os

# Optional: If Tesseract is not in PATH (Windows example)
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def ocr_to_json(image_path, output_json="output.json"):
    # Load image using OpenCV
    image = cv2.imread(image_path)
    
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Optional: Apply thresholding to improve accuracy
    _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)

    # Extract OCR data (word-level with bounding boxes)
    data = pytesseract.image_to_data(thresh, output_type=pytesseract.Output.DICT)

    results = []

    for i in range(len(data['text'])):
        word = data['text'][i].strip()
        if word != "":
            results.append({
                "text": word,
                "confidence": data['conf'][i],
                "bounding_box": {
                    "x": data['left'][i],
                    "y": data['top'][i],
                    "width": data['width'][i],
                    "height": data['height'][i]
                }
            })

    final_output = {
        "file_name": os.path.basename(image_path),
        "total_words": len(results),
        "extracted_data": results
    }

    with open(output_json, "w", encoding="utf-8") as f:
        json.dump(final_output, f, indent=4)

    return final_output


# Example usage
if __name__ == "__main__":
    result = ocr_to_json("sample_image.jpg")
    print(json.dumps(result, indent=4))

{
    "file_name": "sample_image.jpg",
    "total_words": 3,
    "extracted_data": [
        {
            "text": "Invoice",
            "confidence": "96",
            "bounding_box": {
                "x": 100,
                "y": 50,
                "width": 120,
                "height": 30
            }
        }
    ]
}

def simple_ocr_json(image_path):
    text = pytesseract.image_to_string(Image.open(image_path))
    
    return {
        "file": image_path,
        "text": text.strip(),
        "lines": text.strip().split("\n")
    }

pytesseract.image_to_string(img, lang='eng+ben')

pip install pytesseract pillow opencv-python easyocr paddleocr flask fastapi uvicorn python-multipart

import pytesseract
import cv2
import json
from pytesseract import Output

def basic_ocr_to_json(image_path):
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    data = pytesseract.image_to_data(gray, output_type=Output.DICT)

    words = []
    for i in range(len(data['text'])):
        if data['text'][i].strip() != "":
            words.append({
                "text": data['text'][i],
                "confidence": data['conf'][i],
                "box": {
                    "x": data['left'][i],
                    "y": data['top'][i],
                    "w": data['width'][i],
                    "h": data['height'][i]
                }
            })

    return {"total_words": len(words), "words": words}

def advanced_preprocess(image_path):
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5,5), 0)
    thresh = cv2.adaptiveThreshold(
        blur, 255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        11, 2
    )

    text = pytesseract.image_to_string(thresh)
    return {"clean_text": text}

def bengali_english_ocr(image_path):
    img = cv2.imread(image_path)
    text = pytesseract.image_to_string(img, lang='eng+ben')
    return {"text": text}

import easyocr

def easyocr_json(image_path):
    reader = easyocr.Reader(['en','bn'])
    results = reader.readtext(image_path)

    data = []
    for bbox, text, conf in results:
        data.append({
            "text": text,
            "confidence": conf,
            "bounding_box": bbox
        })

    return {"results": data}

from paddleocr import PaddleOCR

def paddle_ocr_json(image_path):
    ocr = PaddleOCR(lang='en')
    result = ocr.ocr(image_path)

    data = []
    for line in result[0]:
        data.append({
            "text": line[1][0],
            "confidence": line[1][1],
            "bounding_box": line[0]
        })

    return {"results": data}

from flask import Flask, request, jsonify
import os

app = Flask(__name__)

@app.route("/ocr", methods=["POST"])
def ocr_api():

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Ptesa #452

Optional: If Tesseract is not in PATH (Windows example)

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

Example usage

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Ptesa #452

Description

Optional: If Tesseract is not in PATH (Windows example)

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

Example usage

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions