source: gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionAPIConverter.pm@ 36989

Last change on this file since 36989 was 36989, checked in by davidb, 17 months ago

Initial work on developing a PagedImagePlugin that uses Google Vision API to OCR pages as part of the ingest process

File size: 9.7 KB
Line 
1######################################################################
2#
3# GoogleVisionAPIConverter.pm -- helper plugin that allows other plugins
4# (such as ImagePlugin and PagedImagePlugin) to extend their
5# processing capability through sub-classing inheritence (such as
6# GoogleVisionImagePlugin and GoogleVisionPagedImagePlugin) to
7# expand the image processing capabilities at ingest time to
8# include the Google Vision API allowing for: metadata labelling
9# of objects within a scene; and OCR text recognition.
10#
11# A component of the Greenstone digital library software
12# from the New Zealand Digital Library Project at the
13# University of Waikato, New Zealand.
14#
15# Copyright (C) 1999 New Zealand Digital Library Project
16#
17# This program is free software; you can redistribute it and/or modify
18# it under the terms of the GNU General Public License as published by
19# the Free Software Foundation; either version 2 of the License, or
20# (at your option) any later version.
21#
22# This program is distributed in the hope that it will be useful,
23# but WITHOUT ANY WARRANTY; without even the implied warranty of
24# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25# GNU General Public License for more details.
26#
27# You should have received a copy of the GNU General Public License
28# along with this program; if not, write to the Free Software
29# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30#
31###########################################################################
32
33package GoogleVisionAPIConverter;
34
35use strict;
36no strict 'refs'; # allow filehandles to be variables and viceversa
37no strict 'subs';
38
39use gsprintf;
40use FileUtils;
41
42##use ImagePlugin;
43use BaseMediaConverter;
44
45use utf8;
46use JSON qw( from_json );
47
48sub BEGIN {
49 @GoogleVisionAPIConverter::ISA = ('BaseMediaConverter');
50}
51
52my $arguments =
53 [ { 'name' => "enable_image_labelling",
54 'desc' => "{GoogleVisionAPIConverter.enable_image_labelling}",
55 'type' => "flag",
56 'reqd' => "no" },
57 { 'name' => "enable_image_ocr",
58 'desc' => "{GoogleVisionAPIConverter.enable_image_ocr}",
59 'type' => "flag",
60 'reqd' => "no" },
61 { 'name' => "enable_document_ocr",
62 'desc' => "{GoogleVisionAPIConverter.enable_document_ocr}",
63 'type' => "flag",
64 'reqd' => "no" }
65];
66
67my $options = { 'name' => "GoogleVisionAPIConverter",
68 'desc' => "{GoogleVisionAPIConverter.desc}",
69 'abstract' => "no",
70 'inherits' => "yes",
71 'args' => $arguments };
72
73sub new {
74 my ($class) = shift (@_);
75 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
76 push(@$pluginlist, $class);
77
78 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
79 push(@{$hashArgOptLists->{"OptList"}},$options);
80
81 my $self = new BaseMediaConverter($pluginlist, $inputargs, $hashArgOptLists, 1);
82
83 return bless $self, $class;
84}
85
86sub begin {
87 my $self = shift (@_);
88 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
89
90 if ($self->{'enable_image_ocr'} && $self->{'enable_document_ocr'}) {
91 print STDERR "Please use the following command syntax for vision types: (--enable_image_ocr | --enable_document_ocr) [--enable_image_labelling]\n";
92 print STDERR "\t\t --enable_image_ocr : optical character recognition for text within images\n";
93 print STDERR "\t\t --enable_document_ocr : optical character recognition for text within documents\n";
94 print STDERR "\t\t --enable_image_labelling : annotation labeling for objects within images\n";
95 exit(2);
96 }
97
98 $self->SUPER::begin(@_);
99}
100
101sub vision_monitor_line {
102 my ($line) = @_;
103
104 my $had_error = 0;
105 my $generate_dot = 0;
106
107 if ($line =~ m/^.*$/)
108 {
109 $generate_dot = 1;
110 }
111
112 return ($had_error,$generate_dot);
113}
114
115sub run_convert {
116 my $self = shift (@_);
117 my ($filename,$file,$doc_obj) = @_;
118
119 my $section = $doc_obj->get_top_section();
120
121 my $verbosity = $self->{'verbosity'};
122 my $outhandle = $self->{'outhandle'};
123 print $outhandle "----- GoogleVisionAPIConveter run_convert -----\n";
124 # print STDERR "*****", $self->{'enable_document_ocr'}, "\n";
125
126 my @vision_type = (); # array containing target ocr / labelling type(s)
127
128 if ($self->{'enable_image_labelling'}) { push(@vision_type, "enable_image_labelling"); }
129 if ($self->{'enable_image_ocr'}) { push(@vision_type, "enable_image_ocr"); }
130 if ($self->{'enable_document_ocr'}) { push(@vision_type, "enable_document_ocr"); }
131
132 my $vision_type_length = @vision_type; # assigning scalar var to array returns length
133
134 if ($vision_type_length != 0) {
135
136 $self->init_cache_for_file($filename);
137 my $cached_image_dir = $self->{'cached_dir'};
138 # my $audio_root = $self->{'cached_file_root'};
139
140 # my $filename_no_path = &File::Basename::basename($filename);
141
142 my $ofile = "google-vision-output.json";
143 my $ofilename = &FileUtils::filenameConcatenate($cached_image_dir,"google-vision-output.json");
144 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
145 my $credentials_filename = &FileUtils::filenameConcatenate($collect_dir, "etc", "atea-storage-cd63a39dfeb5.json");
146
147 if ($vision_type_length == 1) {
148 my $vision_type_first = $vision_type[0];
149 my $vision_cmd = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename\"";
150
151 $self->run_vision($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type_first, $section, $doc_obj);
152 } elsif ($vision_type_length == 2) {
153 my $vision_type_first = $vision_type[0];
154 my $vision_type_second = $vision_type[1];
155
156 my $ofilename_first = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_first . "-google-vision-output.json");
157 my $ofilename_second = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_second . "-google-vision-output.json");
158
159 my $vision_cmd_1 = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_first\"";
160 my $vision_cmd_2 = "vision.py --$vision_type_second --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_second\"";
161
162 $self->run_vision($file, $filename, $ofile, $ofilename_first, $vision_cmd_1, $vision_type_first, $section, $doc_obj);
163 $self->run_vision($file, $filename, $ofile, $ofilename_second, $vision_cmd_2, $vision_type_second, $section, $doc_obj);
164 }
165 }
166
167 return "json";
168}
169
170sub run_vision {
171 use Data::Dumper;
172 my $self = shift (@_);
173 my ($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type, $section, $doc_obj) = @_;
174
175 my $vision_regenerated;
176 my $vision_result;
177 my $vision_error;
178
179 my $print_info = { 'message_prefix' => "GoogleVisionAPI",
180 'message' => "Sending $file to GoogleVisionAPI using vision.py" };
181
182 ($vision_regenerated,$vision_result,$vision_error)
183 = $self->run_cached_general_cmd($vision_cmd,$filename,$ofilename,$print_info);
184
185 $doc_obj->associate_file($ofilename,$vision_type . $ofile,"application/json",$section);
186
187 my $json_text = do { # read in json file
188 open(my $json_fh, "<:encoding(UTF-8)", $ofilename)
189 or die("Can't open \"$ofilename\": $!\n");
190 local $/;
191 <$json_fh>
192 };
193
194 my $cursection = $doc_obj->get_top_section(); # get top section for text append
195
196 my $decoded_json = from_json($json_text);
197 my $ocr_text;
198 if ($vision_type eq "enable_document_ocr" || $vision_type eq "enable_image_ocr") {
199 $ocr_text = $decoded_json->{textAnnotations}[0]{description}; # access full ocr content
200 $doc_obj->add_utf8_text( $cursection, $ocr_text); # append text to section
201
202 my $blocks = $decoded_json->{fullTextAnnotation}{pages}[0]{blocks};
203 my %text_and_language;
204
205 foreach my $block (@{ $blocks }) {
206 foreach my $paragraph (@{ $block->{paragraphs} }) {
207 foreach my $word (@{ $paragraph->{words} }) {
208 my $detected_language = $word->{property}{detectedLanguages}[0]{languageCode} || "no_lang";
209 my $word_text = "";
210 foreach my $letter (@{ $word->{symbols} }) {
211 $word_text .= $letter->{text};
212 }
213 $text_and_language{$detected_language} .= $word_text . " ";
214 }
215 }
216 }
217 for (keys %text_and_language) {
218 $doc_obj->add_utf8_metadata ($cursection, "z_" . $_, $text_and_language{$_});
219 }
220
221
222 } elsif ($vision_type eq "enable_image_labelling") {
223 $ocr_text = $decoded_json->{labelAnnotations};
224 foreach my $label (@{ $ocr_text }) {
225 # write to metadata : 'description'='Book' 'score'='0.9' 'topicality'='0.9' 'mid'='/m/0123'
226 $doc_obj->add_utf8_metadata ($cursection, "description", $label->{description});
227 $doc_obj->add_utf8_metadata ($cursection, "score", $label->{score});
228 $doc_obj->add_utf8_metadata ($cursection, "topicality", $label->{topicality});
229 $doc_obj->add_utf8_metadata ($cursection, "mid", $label->{mid});
230 # write to metadata : 'descriptions'='Book' 'Book_score'='0.9' 'Book_topicality'='0.9' 'Book_mid'='/m/0123'
231 $doc_obj->add_utf8_metadata ($cursection, "descriptions", $label->{description});
232 $doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_score", $label->{score});
233 $doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_topicality", $label->{topicality});
234 $doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_mid", $label->{mid});
235
236 }
237 }
238}
239
2401;
241
242
243
244
245
246
247
248
249
250
251
Note: See TracBrowser for help on using the repository browser.