source: gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionAPIConverter.pm@ 37010

Last change on this file since 37010 was 37010, checked in by davidb, 17 months ago

Changes to make it easier to display OCR bounding-box info in PagedImage doc view

File size: 11.0 KB
Line 
1######################################################################
2#
3# GoogleVisionAPIConverter.pm -- helper plugin that allows other plugins
4# (such as ImagePlugin and PagedImagePlugin) to extend their
5# processing capability through sub-classing inheritence (such as
6# GoogleVisionImagePlugin and GoogleVisionPagedImagePlugin) to
7# expand the image processing capabilities at ingest time to
8# include the Google Vision API allowing for: metadata labelling
9# of objects within a scene; and OCR text recognition.
10#
11# A component of the Greenstone digital library software
12# from the New Zealand Digital Library Project at the
13# University of Waikato, New Zealand.
14#
15# Copyright (C) 1999 New Zealand Digital Library Project
16#
17# This program is free software; you can redistribute it and/or modify
18# it under the terms of the GNU General Public License as published by
19# the Free Software Foundation; either version 2 of the License, or
20# (at your option) any later version.
21#
22# This program is distributed in the hope that it will be useful,
23# but WITHOUT ANY WARRANTY; without even the implied warranty of
24# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25# GNU General Public License for more details.
26#
27# You should have received a copy of the GNU General Public License
28# along with this program; if not, write to the Free Software
29# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30#
31###########################################################################
32
33package GoogleVisionAPIConverter;
34
35use strict;
36no strict 'refs'; # allow filehandles to be variables and viceversa
37no strict 'subs';
38
39use gsprintf;
40use FileUtils;
41
42##use ImagePlugin;
43use BaseMediaConverter;
44
45use utf8;
46use JSON qw( from_json );
47
48sub BEGIN {
49 @GoogleVisionAPIConverter::ISA = ('BaseMediaConverter');
50}
51
52my $arguments = [
53 { 'name' => "google_application_credentials",
54 'desc' => "{GoogleVisionAPIConverter.google_applicatio_credentials}",
55 'type' => "string",
56 'reqd' => "no",
57 'deft' => "google-sa-credentials-key.json"
58 },
59 { 'name' => "enable_image_labelling",
60 'desc' => "{GoogleVisionAPIConverter.enable_image_labelling}",
61 'type' => "flag",
62 'reqd' => "no" },
63 { 'name' => "enable_image_ocr",
64 'desc' => "{GoogleVisionAPIConverter.enable_image_ocr}",
65 'type' => "flag",
66 'reqd' => "no" },
67 { 'name' => "enable_document_ocr",
68 'desc' => "{GoogleVisionAPIConverter.enable_document_ocr}",
69 'type' => "flag",
70 'reqd' => "no" }
71];
72
73my $options = { 'name' => "GoogleVisionAPIConverter",
74 'desc' => "{GoogleVisionAPIConverter.desc}",
75 'abstract' => "no",
76 'inherits' => "yes",
77 'args' => $arguments };
78
79sub new {
80 my ($class) = shift (@_);
81 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
82 push(@$pluginlist, $class);
83
84 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
85 push(@{$hashArgOptLists->{"OptList"}},$options);
86
87 my $self = new BaseMediaConverter($pluginlist, $inputargs, $hashArgOptLists, 1);
88
89 return bless $self, $class;
90}
91
92sub begin {
93 my $self = shift (@_);
94 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
95
96 if ($self->{'enable_image_ocr'} && $self->{'enable_document_ocr'}) {
97 print STDERR "Please use the following command syntax for vision types: (--enable_image_ocr | --enable_document_ocr) [--enable_image_labelling]\n";
98 print STDERR "\t\t --enable_image_ocr : optical character recognition for text within images\n";
99 print STDERR "\t\t --enable_document_ocr : optical character recognition for text within documents\n";
100 print STDERR "\t\t --enable_image_labelling : annotation labeling for objects within images\n";
101 exit(2);
102 }
103
104 $self->SUPER::begin(@_);
105}
106
107sub vision_monitor_line {
108 my ($line) = @_;
109
110 my $had_error = 0;
111 my $generate_dot = 0;
112
113 if ($line =~ m/^.*$/)
114 {
115 $generate_dot = 1;
116 }
117
118 return ($had_error,$generate_dot);
119}
120
121sub run_gv_convert {
122 my $self = shift (@_);
123 my ($filename,$file,$doc_obj,$opt_section) = @_;
124
125 my $section = (defined $opt_section) ? $opt_section : $doc_obj->get_top_section();
126
127 my $verbosity = $self->{'verbosity'};
128 my $outhandle = $self->{'outhandle'};
129 print $outhandle "----- GoogleVisionAPIConveter run_gv_convert -----\n";
130 # print STDERR "*****", $self->{'enable_document_ocr'}, "\n";
131
132 my @vision_type = (); # array containing target ocr / labelling type(s)
133
134 if ($self->{'enable_image_labelling'}) { push(@vision_type, "enable_image_labelling"); }
135 if ($self->{'enable_image_ocr'}) { push(@vision_type, "enable_image_ocr"); }
136 if ($self->{'enable_document_ocr'}) { push(@vision_type, "enable_document_ocr"); }
137
138 my $vision_type_length = @vision_type; # assigning scalar var to array returns length
139
140 if ($vision_type_length != 0) {
141
142 $self->init_cache_for_file($filename);
143 my $cached_image_dir = $self->{'cached_dir'};
144 # my $audio_root = $self->{'cached_file_root'};
145
146 # my $filename_no_path = &File::Basename::basename($filename);
147
148 my $ofile = "google-vision-output.json";
149 my $ofilename = &FileUtils::filenameConcatenate($cached_image_dir,"google-vision-output.json");
150 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
151 my $credentials_filename = &FileUtils::filenameConcatenate($collect_dir, "etc", $self->{'google_application_credentials'});
152
153 if ($vision_type_length == 1) {
154 my $vision_type_first = $vision_type[0];
155 my $vision_cmd = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename\"";
156
157 $self->run_vision($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type_first, $section, $doc_obj);
158 }
159 elsif ($vision_type_length == 2) {
160 my $vision_type_first = $vision_type[0];
161 my $vision_type_second = $vision_type[1];
162
163 my $ofilename_first = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_first . "-google-vision-output.json");
164 my $ofilename_second = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_second . "-google-vision-output.json");
165
166 my $vision_cmd_1 = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_first\"";
167 my $vision_cmd_2 = "vision.py --$vision_type_second --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_second\"";
168
169 $self->run_vision($file, $filename, $ofile, $ofilename_first, $vision_cmd_1, $vision_type_first, $section, $doc_obj);
170 $self->run_vision($file, $filename, $ofile, $ofilename_second, $vision_cmd_2, $vision_type_second, $section, $doc_obj);
171 }
172 }
173
174 return "json";
175}
176
177sub run_vision {
178 use Data::Dumper;
179 my $self = shift (@_);
180 my ($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type, $section, $doc_obj) = @_;
181
182 my $vision_regenerated;
183 my $vision_result;
184 my $vision_error;
185
186 my $print_info = { 'message_prefix' => "GoogleVisionAPI",
187 'message' => "Sending $file to GoogleVisionAPI using vision.py" };
188
189 ($vision_regenerated,$vision_result,$vision_error)
190 = $self->run_cached_general_cmd($vision_cmd,$filename,$ofilename,$print_info);
191
192 # Need to work a bit harder in setting up the associated JSON file
193 # => strip of 'enable_' in favour of 'gv_'
194 # => add in section number as part of the file name to avoid clashes
195
196 my $gv_assoc_prefix = $vision_type;
197 $gv_assoc_prefix =~ s/^enable_/gv_/;
198
199 my $section_file_suffix = $section;
200 $section_file_suffix =~ s/\./_/g;
201
202 my $assoc_ofile = $gv_assoc_prefix.$ofile;
203 $assoc_ofile =~ s/\.(.*?)$/$section_file_suffix.$1/;
204
205 $doc_obj->associate_file($ofilename,$assoc_ofile,"application/json",$section);
206
207 my $json_text = do { # read in json file
208 open(my $json_fh, "<:encoding(UTF-8)", $ofilename)
209 or die("Can't open \"$ofilename\": $!\n");
210 local $/;
211 <$json_fh>
212 };
213
214 my $decoded_json = from_json($json_text);
215 my $ocr_text;
216 if ($vision_type eq "enable_document_ocr" || $vision_type eq "enable_image_ocr") {
217 $ocr_text = $decoded_json->{textAnnotations}[0]{description}; # access full ocr content
218 $doc_obj->add_utf8_text( $section, $ocr_text); # append text to section
219
220 my $blocks = $decoded_json->{fullTextAnnotation}{pages}[0]{blocks};
221 my %text_and_language;
222
223 foreach my $block (@{ $blocks }) {
224 foreach my $paragraph (@{ $block->{paragraphs} }) {
225 foreach my $word (@{ $paragraph->{words} }) {
226 my $detected_language = $word->{property}{detectedLanguages}[0]{languageCode} || "no_lang";
227 my $word_text = "";
228 foreach my $letter (@{ $word->{symbols} }) {
229 $word_text .= $letter->{text};
230 }
231 $text_and_language{$detected_language} .= $word_text . " ";
232 }
233 }
234 }
235
236 for (keys %text_and_language) {
237 $doc_obj->add_utf8_metadata ($section, "z_" . $_, $text_and_language{$_});
238 }
239
240
241 my $assoc_json_metaname = "HasGoogleVision";
242
243 if ($vision_type eq "enable_document_ocr") {
244 $assoc_json_metaname .= "DocumentOCRJSON";
245
246 $doc_obj->add_utf8_metadata ($section, "GVDocumentOCRJSON",$assoc_ofile);
247 }
248 else {
249 # $vision_type eq "enable_image_ocr")
250 $assoc_json_metaname .= "ImageOCRJSON";
251
252 $doc_obj->add_utf8_metadata ($section, "GVImageOCRJSON",$assoc_ofile);
253 }
254
255 $doc_obj->add_utf8_metadata ($section, $assoc_json_metaname, 1);
256 }
257 elsif ($vision_type eq "enable_image_labelling") {
258 $ocr_text = $decoded_json->{labelAnnotations};
259 foreach my $label (@{ $ocr_text }) {
260 # write to metadata : 'description'='Book' 'score'='0.9' 'topicality'='0.9' 'mid'='/m/0123'
261 $doc_obj->add_utf8_metadata ($section, "description", $label->{description});
262 $doc_obj->add_utf8_metadata ($section, "score", $label->{score});
263 $doc_obj->add_utf8_metadata ($section, "topicality", $label->{topicality});
264 $doc_obj->add_utf8_metadata ($section, "mid", $label->{mid});
265
266 # write to metadata : 'descriptions'='Book' 'Book_score'='0.9' 'Book_topicality'='0.9' 'Book_mid'='/m/0123'
267 $doc_obj->add_utf8_metadata ($section, "descriptions", $label->{description});
268 $doc_obj->add_utf8_metadata ($section, $label->{description} . "_score", $label->{score});
269 $doc_obj->add_utf8_metadata ($section, $label->{description} . "_topicality", $label->{topicality});
270 $doc_obj->add_utf8_metadata ($section, $label->{description} . "_mid", $label->{mid});
271
272 }
273
274 $doc_obj->add_utf8_metadata ($section, "HasGoogleVisionImageLabellingJSON", 1);
275 $doc_obj->add_utf8_metadata ($section, "GVImageLabellingJSON",$assoc_ofile);
276
277 }
278}
279
2801;
281
282
283
284
285
286
287
288
289
290
291
Note: See TracBrowser for help on using the repository browser.