1 | /*
|
---|
2 |
|
---|
3 | https://stackoverflow.com/questions/68395710/building-a-bounding-box-surrounding-text-in-google-vision-api-to-extract-the-tex
|
---|
4 |
|
---|
5 | def get_text_within(document, x1, y1, x2, y2):
|
---|
6 | text = ""
|
---|
7 | for page in document.pages:
|
---|
8 | for block in page.blocks:
|
---|
9 | for paragraph in block.paragraphs:
|
---|
10 | for word in paragraph.words:
|
---|
11 | for symbol in word.symbols:
|
---|
12 | min_x = min(symbol.bounding_box.vertices[0].x, symbol.bounding_box.vertices[1].x,
|
---|
13 | symbol.bounding_box.vertices[2].x, symbol.bounding_box.vertices[3].x)
|
---|
14 | max_x = max(symbol.bounding_box.vertices[0].x, symbol.bounding_box.vertices[1].x,
|
---|
15 | symbol.bounding_box.vertices[2].x, symbol.bounding_box.vertices[3].x)
|
---|
16 | min_y = min(symbol.bounding_box.vertices[0].y, symbol.bounding_box.vertices[1].y,
|
---|
17 | symbol.bounding_box.vertices[2].y, symbol.bounding_box.vertices[3].y)
|
---|
18 | max_y = max(symbol.bounding_box.vertices[0].y, symbol.bounding_box.vertices[1].y,
|
---|
19 | symbol.bounding_box.vertices[2].y, symbol.bounding_box.vertices[3].y)
|
---|
20 | if (min_x >= x1 and max_x <= x2 and min_y >= y1 and max_y <= y2):
|
---|
21 | text += symbol.text
|
---|
22 | if (symbol.property.detected_break.type == 1 or
|
---|
23 | symbol.property.detected_break.type == 3):
|
---|
24 | text += ' '
|
---|
25 | if (symbol.property.detected_break.type == 2):
|
---|
26 | text += '\t'
|
---|
27 | if (symbol.property.detected_break.type == 5):
|
---|
28 | text += '\n'
|
---|
29 | return text
|
---|
30 |
|
---|
31 | */
|
---|
32 |
|
---|
33 |
|
---|
34 | /*
|
---|
35 |
|
---|
36 | https://stackoverflow.com/questions/57071788/google-vision-api-text-detection-display-words-by-block
|
---|
37 |
|
---|
38 |
|
---|
39 | https://gist.github.com/UBISOFT-1/f00e4d22790f4af378d70b237fa56ca9
|
---|
40 |
|
---|
41 | response = client.text_detection(image=image)
|
---|
42 | # The actual response for the first page of the input file.
|
---|
43 | breaks = vision.enums.TextAnnotation.DetectedBreak.BreakType
|
---|
44 | paragraphs = []
|
---|
45 | lines = []
|
---|
46 | # extract text by block of detection
|
---|
47 | for page in response.full_text_annotation.pages:
|
---|
48 | for block in page.blocks:
|
---|
49 | for paragraph in block.paragraphs:
|
---|
50 | para = ""
|
---|
51 | line = ""
|
---|
52 | suppose = str(paragraph.bounding_box)
|
---|
53 | suppose = suppose.replace('vertices ','')
|
---|
54 | print(suppose)
|
---|
55 | for word in paragraph.words:
|
---|
56 | for symbol in word.symbols:
|
---|
57 | line += symbol.text
|
---|
58 | if symbol.property.detected_break.type == breaks.SPACE:
|
---|
59 | line += ' '
|
---|
60 | if symbol.property.detected_break.type == breaks.EOL_SURE_SPACE:
|
---|
61 | line += ' '
|
---|
62 | lines.append(line)
|
---|
63 | para += line
|
---|
64 | line = ''
|
---|
65 | if symbol.property.detected_break.type == breaks.LINE_BREAK:
|
---|
66 | lines.append(line)
|
---|
67 | para += line
|
---|
68 | line = ''
|
---|
69 | paragraphs.append(para)
|
---|
70 |
|
---|
71 |
|
---|
72 | return "\n".join(paragraphs)
|
---|
73 |
|
---|
74 |
|
---|
75 |
|
---|
76 |
|
---|
77 | https://blog.searce.com/tips-tricks-for-using-google-vision-api-for-text-detection-2d6d1e0c6361
|
---|
78 |
|
---|
79 | def draw_boxes(image, bounds, color,width=5):
|
---|
80 | draw = ImageDraw.Draw(image)
|
---|
81 | for bound in bounds:
|
---|
82 | draw.line([
|
---|
83 | bound.vertices[0].x, bound.vertices[0].y,
|
---|
84 | bound.vertices[1].x, bound.vertices[1].y,
|
---|
85 | bound.vertices[2].x, bound.vertices[2].y,
|
---|
86 | bound.vertices[3].x, bound.vertices[3].y,
|
---|
87 | bound.vertices[0].x, bound.vertices[0].y],fill=color, width=width)
|
---|
88 | return image
|
---|
89 | def get_document_bounds(response, feature):
|
---|
90 | for i,page in enumerate(document.pages):
|
---|
91 | for block in page.blocks:
|
---|
92 | if feature==FeatureType.BLOCK:
|
---|
93 | bounds.append(block.bounding_box)
|
---|
94 | for paragraph in block.paragraphs:
|
---|
95 | if feature==FeatureType.PARA:
|
---|
96 | bounds.append(paragraph.bounding_box)
|
---|
97 | for word in paragraph.words:
|
---|
98 | for symbol in word.symbols:
|
---|
99 | if (feature == FeatureType.SYMBOL):
|
---|
100 | bounds.append(symbol.bounding_box)
|
---|
101 | if (feature == FeatureType.WORD):
|
---|
102 | bounds.append(word.bounding_box)
|
---|
103 | return bounds
|
---|
104 | bounds=get_document_bounds(response, FeatureType.WORD)
|
---|
105 | draw_boxes(image,bounds, 'yellow')
|
---|
106 |
|
---|
107 | */
|
---|