source: gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionImagePlugin.pm@ 36247

Last change on this file since 36247 was 36247, checked in by davidb, 23 months ago

initial cut at files to provide google vision api processing of images using the GoogleVisionImagePlugin

File size: 10.2 KB
Line 
1######################################################################
2#
3# GoogleVisionImagePlugin.pm -- plugin that extends the capability of
4# ImagePlugin to use Google Vision API allowing for: metadata labelling
5# of objects within a scene; OCR text recognition.
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package GoogleVisionImagePlugin;
29
30use strict;
31no strict 'refs'; # allow filehandles to be variables and viceversa
32no strict 'subs';
33
34use gsprintf;
35use FileUtils;
36
37use ImagePlugin;
38use BaseMediaConverter;
39
40use utf8;
41use JSON qw( from_json );
42
43sub BEGIN {
44 @GoogleVisionImagePlugin::ISA = ('ImagePlugin', 'BaseMediaConverter');
45}
46
47my $arguments =
48 [ { 'name' => "enable_image_labelling",
49 'desc' => "{GoogleVisionImagePlugin.enable_image_labelling}",
50 'type' => "flag",
51 'reqd' => "no" },
52 { 'name' => "enable_image_ocr",
53 'desc' => "{GoogleVisionImagePlugin.enable_image_ocr}",
54 'type' => "flag",
55 'reqd' => "no" },
56 { 'name' => "enable_document_ocr",
57 'desc' => "{GoogleVisionImagePlugin.enable_document_ocr}",
58 'type' => "flag",
59 'reqd' => "no" }
60];
61
62my $options = { 'name' => "GoogleVisionImagePlugin",
63 'desc' => "{GoogleVisionImagePlugin.desc}",
64 'abstract' => "no",
65 'inherits' => "yes",
66 'args' => $arguments };
67
68sub new {
69 my ($class) = shift (@_);
70 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
71 push(@$pluginlist, $class);
72
73 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
74 push(@{$hashArgOptLists->{"OptList"}},$options);
75
76 new BaseMediaConverter($pluginlist, $inputargs, $hashArgOptLists, 1);
77 my $self = new ImagePlugin($pluginlist, $inputargs, $hashArgOptLists);
78
79 return bless $self, $class;
80}
81
82sub begin {
83 my $self = shift (@_);
84 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
85
86 if ($self->{'enable_image_ocr'} && $self->{'enable_document_ocr'}) {
87 print STDERR "Please use the following command syntax for vision types: (--enable_image_ocr | --enable_document_ocr) [--enable_image_labelling]\n";
88 print STDERR "\t\t --enable_image_ocr : optical character recognition for text within images\n";
89 print STDERR "\t\t --enable_document_ocr : optical character recognition for text within documents\n";
90 print STDERR "\t\t --enable_image_labelling : annotation labeling for objects within images\n";
91 exit(2);
92 }
93
94 $self->SUPER::begin(@_);
95}
96
97sub vision_monitor_line {
98 my ($line) = @_;
99
100 my $had_error = 0;
101 my $generate_dot = 0;
102
103 if ($line =~ m/^.*$/)
104 {
105 $generate_dot = 1;
106 }
107
108 return ($had_error,$generate_dot);
109}
110
111sub run_convert {
112 my $self = shift (@_);
113 my ($base_dir,$filename,$file,$doc_obj) = @_;
114
115 my $section = $doc_obj->get_top_section();
116
117 my $verbosity = $self->{'verbosity'};
118 my $outhandle = $self->{'outhandle'};
119 print $outhandle "----- GoogleVisionImagePlugin run_convert -----\n";
120 # print STDERR "*****", $self->{'enable_document_ocr'}, "\n";
121
122 my @vision_type = (); # array containing target ocr / labelling type(s)
123
124 if ($self->{'enable_image_labelling'}) { push(@vision_type, "enable_image_labelling"); }
125 if ($self->{'enable_image_ocr'}) { push(@vision_type, "enable_image_ocr"); }
126 if ($self->{'enable_document_ocr'}) { push(@vision_type, "enable_document_ocr"); }
127
128 my $vision_type_length = @vision_type; # assigning scalar var to array returns length
129
130 if ($vision_type_length != 0) {
131
132 $self->init_cache_for_file($filename);
133 my $cached_image_dir = $self->{'cached_dir'};
134 # my $audio_root = $self->{'cached_file_root'};
135
136 # my $filename_no_path = &File::Basename::basename($filename);
137
138 my $ofile = "google-vision-output.json";
139 my $ofilename = &FileUtils::filenameConcatenate($cached_image_dir,"google-vision-output.json");
140 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
141 my $credentials_filename = &FileUtils::filenameConcatenate($collect_dir, "etc", "atea-storage-cd63a39dfeb5.json");
142
143 if ($vision_type_length == 1) {
144 my $vision_type_first = $vision_type[0];
145 my $vision_cmd = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename\"";
146
147 $self->run_vision($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type_first, $section, $doc_obj);
148 } elsif ($vision_type_length == 2) {
149 my $vision_type_first = $vision_type[0];
150 my $vision_type_second = $vision_type[1];
151
152 my $ofilename_first = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_first . "-google-vision-output.json");
153 my $ofilename_second = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_second . "-google-vision-output.json");
154
155 my $vision_cmd_1 = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_first\"";
156 my $vision_cmd_2 = "vision.py --$vision_type_second --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_second\"";
157
158 $self->run_vision($file, $filename, $ofile, $ofilename_first, $vision_cmd_1, $vision_type_first, $section, $doc_obj);
159 $self->run_vision($file, $filename, $ofile, $ofilename_second, $vision_cmd_2, $vision_type_second, $section, $doc_obj);
160 }
161 }
162
163 return "json";
164}
165
166sub run_vision {
167 use Data::Dumper;
168 my $self = shift (@_);
169 my ($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type, $section, $doc_obj) = @_;
170
171 my $vision_regenerated;
172 my $vision_result;
173 my $vision_error;
174
175 my $print_info = { 'message_prefix' => "GoogleVisionAPI",
176 'message' => "Sending $file to GoogleVisionAPI using vision.py" };
177
178 ($vision_regenerated,$vision_result,$vision_error)
179 = $self->run_cached_general_cmd($vision_cmd,$filename,$ofilename,$print_info);
180
181 $doc_obj->associate_file($ofilename,$vision_type . $ofile,"application/json",$section);
182
183 my $json_text = do { # read in json file
184 open(my $json_fh, "<:encoding(UTF-8)", $ofilename)
185 or die("Can't open \"$ofilename\": $!\n");
186 local $/;
187 <$json_fh>
188 };
189
190 my $cursection = $doc_obj->get_top_section(); # get top section for text append
191
192 my $decoded_json = from_json($json_text);
193 my $ocr_text;
194 if ($vision_type eq "enable_document_ocr" || $vision_type eq "enable_image_ocr") {
195 $ocr_text = $decoded_json->{textAnnotations}[0]{description}; # access full ocr content
196 $doc_obj->add_utf8_text( $cursection, $ocr_text); # append text to section
197
198 my $blocks = $decoded_json->{fullTextAnnotation}{pages}[0]{blocks};
199 my %text_and_language;
200
201 foreach my $block (@{ $blocks }) {
202 foreach my $paragraph (@{ $block->{paragraphs} }) {
203 foreach my $word (@{ $paragraph->{words} }) {
204 my $detected_language = $word->{property}{detectedLanguages}[0]{languageCode} || "no_lang";
205 my $word_text = "";
206 foreach my $letter (@{ $word->{symbols} }) {
207 $word_text .= $letter->{text};
208 }
209 $text_and_language{$detected_language} .= $word_text . " ";
210 }
211 }
212 }
213 for (keys %text_and_language) {
214 $doc_obj->add_utf8_metadata ($cursection, "z_" . $_, $text_and_language{$_});
215 }
216
217
218 } elsif ($vision_type eq "enable_image_labelling") {
219 $ocr_text = $decoded_json->{labelAnnotations};
220 foreach my $label (@{ $ocr_text }) {
221 # write to metadata : 'description'='Book' 'score'='0.9' 'topicality'='0.9' 'mid'='/m/0123'
222 $doc_obj->add_utf8_metadata ($cursection, "description", $label->{description});
223 $doc_obj->add_utf8_metadata ($cursection, "score", $label->{score});
224 $doc_obj->add_utf8_metadata ($cursection, "topicality", $label->{topicality});
225 $doc_obj->add_utf8_metadata ($cursection, "mid", $label->{mid});
226 # write to metadata : 'descriptions'='Book' 'Book_score'='0.9' 'Book_topicality'='0.9' 'Book_mid'='/m/0123'
227 $doc_obj->add_utf8_metadata ($cursection, "descriptions", $label->{description});
228 $doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_score", $label->{score});
229 $doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_topicality", $label->{topicality});
230 $doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_mid", $label->{mid});
231
232 }
233 }
234}
235
236# do plugin specific processing of doc_obj
237sub process {
238 my $self = shift (@_);
239 # options??
240 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
241
242 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
243 my $utf8_filename_no_path = $self->filepath_to_utf8($filename_no_path);
244 my $url_encoded_filename = &util::rename_file($utf8_filename_no_path, $self->{'file_rename_method'});
245
246 $self->run_convert($base_dir,$filename_full_path,$url_encoded_filename,$doc_obj);
247
248 $self->SUPER::process(@_);
249}
250
2511;
252
253
254
255
256
257
258
259
260
261
262
Note: See TracBrowser for help on using the repository browser.