1 |
|
---|
2 | /*
|
---|
3 |
|
---|
4 | https://stackoverflow.com/questions/57071788/google-vision-api-text-detection-display-words-by-block
|
---|
5 |
|
---|
6 |
|
---|
7 | https://gist.github.com/UBISOFT-1/f00e4d22790f4af378d70b237fa56ca9
|
---|
8 |
|
---|
9 | response = client.text_detection(image=image)
|
---|
10 | # The actual response for the first page of the input file.
|
---|
11 | breaks = vision.enums.TextAnnotation.DetectedBreak.BreakType
|
---|
12 | paragraphs = []
|
---|
13 | lines = []
|
---|
14 | # extract text by block of detection
|
---|
15 | for page in response.full_text_annotation.pages:
|
---|
16 | for block in page.blocks:
|
---|
17 | for paragraph in block.paragraphs:
|
---|
18 | para = ""
|
---|
19 | line = ""
|
---|
20 | suppose = str(paragraph.bounding_box)
|
---|
21 | suppose = suppose.replace('vertices ','')
|
---|
22 | print(suppose)
|
---|
23 | for word in paragraph.words:
|
---|
24 | for symbol in word.symbols:
|
---|
25 | line += symbol.text
|
---|
26 | if symbol.property.detected_break.type == breaks.SPACE:
|
---|
27 | line += ' '
|
---|
28 | if symbol.property.detected_break.type == breaks.EOL_SURE_SPACE:
|
---|
29 | line += ' '
|
---|
30 | lines.append(line)
|
---|
31 | para += line
|
---|
32 | line = ''
|
---|
33 | if symbol.property.detected_break.type == breaks.LINE_BREAK:
|
---|
34 | lines.append(line)
|
---|
35 | para += line
|
---|
36 | line = ''
|
---|
37 | paragraphs.append(para)
|
---|
38 |
|
---|
39 |
|
---|
40 | return "\n".join(paragraphs)
|
---|
41 |
|
---|
42 |
|
---|
43 |
|
---|
44 |
|
---|
45 | https://blog.searce.com/tips-tricks-for-using-google-vision-api-for-text-detection-2d6d1e0c6361
|
---|
46 |
|
---|
47 | def draw_boxes(image, bounds, color,width=5):
|
---|
48 | draw = ImageDraw.Draw(image)
|
---|
49 | for bound in bounds:
|
---|
50 | draw.line([
|
---|
51 | bound.vertices[0].x, bound.vertices[0].y,
|
---|
52 | bound.vertices[1].x, bound.vertices[1].y,
|
---|
53 | bound.vertices[2].x, bound.vertices[2].y,
|
---|
54 | bound.vertices[3].x, bound.vertices[3].y,
|
---|
55 | bound.vertices[0].x, bound.vertices[0].y],fill=color, width=width)
|
---|
56 | return image
|
---|
57 | def get_document_bounds(response, feature):
|
---|
58 | for i,page in enumerate(document.pages):
|
---|
59 | for block in page.blocks:
|
---|
60 | if feature==FeatureType.BLOCK:
|
---|
61 | bounds.append(block.bounding_box)
|
---|
62 | for paragraph in block.paragraphs:
|
---|
63 | if feature==FeatureType.PARA:
|
---|
64 | bounds.append(paragraph.bounding_box)
|
---|
65 | for word in paragraph.words:
|
---|
66 | for symbol in word.symbols:
|
---|
67 | if (feature == FeatureType.SYMBOL):
|
---|
68 | bounds.append(symbol.bounding_box)
|
---|
69 | if (feature == FeatureType.WORD):
|
---|
70 | bounds.append(word.bounding_box)
|
---|
71 | return bounds
|
---|
72 | bounds=get_document_bounds(response, FeatureType.WORD)
|
---|
73 | draw_boxes(image,bounds, 'yellow')
|
---|
74 |
|
---|
75 | */
|
---|