Skip to content

Ptesa #452

@dkingfarhan88

Description

@dkingfarhan88

pip install pytesseract pillow opencv-python

import pytesseract
from PIL import Image
import cv2
import json
import os

Optional: If Tesseract is not in PATH (Windows example)

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def ocr_to_json(image_path, output_json="output.json"):
# Load image using OpenCV
image = cv2.imread(image_path)

# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Optional: Apply thresholding to improve accuracy
_, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)

# Extract OCR data (word-level with bounding boxes)
data = pytesseract.image_to_data(thresh, output_type=pytesseract.Output.DICT)

results = []

for i in range(len(data['text'])):
    word = data['text'][i].strip()
    if word != "":
        results.append({
            "text": word,
            "confidence": data['conf'][i],
            "bounding_box": {
                "x": data['left'][i],
                "y": data['top'][i],
                "width": data['width'][i],
                "height": data['height'][i]
            }
        })

final_output = {
    "file_name": os.path.basename(image_path),
    "total_words": len(results),
    "extracted_data": results
}

with open(output_json, "w", encoding="utf-8") as f:
    json.dump(final_output, f, indent=4)

return final_output

Example usage

if name == "main":
result = ocr_to_json("sample_image.jpg")
print(json.dumps(result, indent=4))

{
"file_name": "sample_image.jpg",
"total_words": 3,
"extracted_data": [
{
"text": "Invoice",
"confidence": "96",
"bounding_box": {
"x": 100,
"y": 50,
"width": 120,
"height": 30
}
}
]
}

def simple_ocr_json(image_path):
text = pytesseract.image_to_string(Image.open(image_path))

return {
    "file": image_path,
    "text": text.strip(),
    "lines": text.strip().split("\n")
}

pytesseract.image_to_string(img, lang='eng+ben')

pip install pytesseract pillow opencv-python easyocr paddleocr flask fastapi uvicorn python-multipart

import pytesseract
import cv2
import json
from pytesseract import Output

def basic_ocr_to_json(image_path):
img = cv2.imread(image_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

data = pytesseract.image_to_data(gray, output_type=Output.DICT)

words = []
for i in range(len(data['text'])):
    if data['text'][i].strip() != "":
        words.append({
            "text": data['text'][i],
            "confidence": data['conf'][i],
            "box": {
                "x": data['left'][i],
                "y": data['top'][i],
                "w": data['width'][i],
                "h": data['height'][i]
            }
        })

return {"total_words": len(words), "words": words}

def advanced_preprocess(image_path):
img = cv2.imread(image_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.adaptiveThreshold(
blur, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
11, 2
)

text = pytesseract.image_to_string(thresh)
return {"clean_text": text}

def bengali_english_ocr(image_path):
img = cv2.imread(image_path)
text = pytesseract.image_to_string(img, lang='eng+ben')
return {"text": text}

import easyocr

def easyocr_json(image_path):
reader = easyocr.Reader(['en','bn'])
results = reader.readtext(image_path)

data = []
for bbox, text, conf in results:
    data.append({
        "text": text,
        "confidence": conf,
        "bounding_box": bbox
    })

return {"results": data}

from paddleocr import PaddleOCR

def paddle_ocr_json(image_path):
ocr = PaddleOCR(lang='en')
result = ocr.ocr(image_path)

data = []
for line in result[0]:
    data.append({
        "text": line[1][0],
        "confidence": line[1][1],
        "bounding_box": line[0]
    })

return {"results": data}

from flask import Flask, request, jsonify
import os

app = Flask(name)

@app.route("/ocr", methods=["POST"])
def ocr_api():

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions