table-structure/cloud_api.py at master · scriptreiter/table-structure · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import json
import time
import os
import cv2
import base64
import httplib2

import dir_helper

from apiclient.discovery import build
from oauth2client.client import GoogleCredentials

def query_google_ocr(image_content):
  '''Run a label request on a single image'''

  API_DISCOVERY_FILE = 'https://vision.googleapis.com/$discovery/rest?version=v1'
  http = httplib2.Http()

  credentials = GoogleCredentials.get_application_default().create_scoped(
      ['https://www.googleapis.com/auth/cloud-platform'])
  credentials.authorize(http)

  service = build('vision', 'v1', http=http, discoveryServiceUrl=API_DISCOVERY_FILE)

  service_request = service.images().annotate(
    body={
      'requests': [{
        'image': {
          'content': image_content
         },
        'features': [{
          'type': 'TEXT_DETECTION',
          'maxResults': 1
         }]
       }]
    })

  return service_request.execute()

def get_labels(response, combine=False):
  if 'textAnnotations' not in response['responses'][0]:
    return '' if combine else []

  detections = response['responses'][0]['textAnnotations']

  if combine:
    return detections[0]['description'].replace('\n', ' ').strip()
  else:
    return label_boxes(detections[1:])


def label_boxes(detections):
  boxes = []
  for det in detections:
    xs = [x['x'] for x in det['boundingPoly']]
    ys = [x['y'] for x in det['boundingPoly']]

    min_x = min(xs)
    min_y = min(xs)

    boxes.append((min_x, min_y, max(xs) - min_x, max(ys) - min_y, det['description']))

  return boxes

def get_cell_label(cache_base, img_base, photo_file, box, zoom, sleep_delay):
  cache_path = cache_base + photo_file + '_' + '_'.join([str(x) for x in box[:4]]) + '.json'

  if os.path.isfile(cache_path):
    with open(cache_path, 'r') as cache_file:
      response = json.loads(cache_file.read())
  else:
    img = cv2.imread(img_base + photo_file)
    x1 = zoom * box[0]
    x2 = x1 + (zoom * box[2])
    y1 = zoom * box[1]
    y2 = y1 + (zoom * box[3])

    cell = img[y1:y2, x1:x2]

    retval, cell_buffer = cv2.imencode('.jpg', cell)

    image_content = base64.b64encode(cell_buffer).decode()

    response = query_google_ocr(image_content)

    time.sleep(sleep_delay)

    if 'responses' in response:
      dir_helper.ensure(cache_path)
      with open(cache_path, 'w') as cache_file:
        json.dump(response, cache_file)
    else:
      return ''

  return get_labels(response, combine=True)

def add_labels(boxes, image_base, image_path, cache_path, zoom, sleep_delay):
  labeled = []
  for box in boxes:
    label = get_cell_label(cache_path, image_base, image_path, box, zoom, sleep_delay)
    labeled.append((box[0], box[1], box[2], box[3], [label]))

  return labeled