source: gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionImagePlugin.pm@ 36247

Last change on this file since 36247 was 36247, checked in by davidb, 2 months ago

initial cut at files to provide google vision api processing of images using the GoogleVisionImagePlugin

File size: 10.2 KB
Line 
1######################################################################
2#
3# GoogleVisionImagePlugin.pm -- plugin that extends the capability of
4# ImagePlugin to use Google Vision API allowing for: metadata labelling
5# of objects within a scene; OCR text recognition.
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package GoogleVisionImagePlugin;
29
30use strict;
31no strict 'refs'; # allow filehandles to be variables and viceversa
32no strict 'subs';
33
34use gsprintf;
35use FileUtils;
36
37use ImagePlugin;
38use BaseMediaConverter;
39
40use utf8;
41use JSON qw( from_json );
42
43sub BEGIN {
44 @GoogleVisionImagePlugin::ISA = ('ImagePlugin', 'BaseMediaConverter');
45}
46
47my $arguments =
48 [ { 'name' => "enable_image_labelling",
49 'desc' => "{GoogleVisionImagePlugin.enable_image_labelling}",
50 'type' => "flag",
51 'reqd' => "no" },
52 { 'name' => "enable_image_ocr",
53 'desc' => "{GoogleVisionImagePlugin.enable_image_ocr}",
54 'type' => "flag",
55 'reqd' => "no" },
56 { 'name' => "enable_document_ocr",
57 'desc' => "{GoogleVisionImagePlugin.enable_document_ocr}",
58 'type' => "flag",
59 'reqd' => "no" }
60];
61
62my $options = { 'name' => "GoogleVisionImagePlugin",
63 'desc' => "{GoogleVisionImagePlugin.desc}",
64 'abstract' => "no",
65 'inherits' => "yes",
66 'args' => $arguments };
67
68sub new {
69 my ($class) = shift (@_);
70 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
71 push(@$pluginlist, $class);
72
73 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
74 push(@{$hashArgOptLists->{"OptList"}},$options);
75
76 new BaseMediaConverter($pluginlist, $inputargs, $hashArgOptLists, 1);
77 my $self = new ImagePlugin($pluginlist, $inputargs, $hashArgOptLists);
78
79 return bless $self, $class;
80}
81
82sub begin {
83 my $self = shift (@_);
84 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
85
86 if ($self->{'enable_image_ocr'} && $self->{'enable_document_ocr'}) {
87 print STDERR "Please use the following command syntax for vision types: (--enable_image_ocr | --enable_document_ocr) [--enable_image_labelling]\n";
88 print STDERR "\t\t --enable_image_ocr : optical character recognition for text within images\n";
89 print STDERR "\t\t --enable_document_ocr : optical character recognition for text within documents\n";
90 print STDERR "\t\t --enable_image_labelling : annotation labeling for objects within images\n";
91 exit(2);
92 }
93
94 $self->SUPER::begin(@_);
95}
96
97sub vision_monitor_line {
98 my ($line) = @_;
99
100 my $had_error = 0;
101 my $generate_dot = 0;
102
103 if ($line =~ m/^.*$/)
104 {
105 $generate_dot = 1;
106 }
107
108 return ($had_error,$generate_dot);
109}
110
111sub run_convert {
112 my $self = shift (@_);
113 my ($base_dir,$filename,$file,$doc_obj) = @_;
114
115 my $section = $doc_obj->get_top_section();
116
117 my $verbosity = $self->{'verbosity'};
118 my $outhandle = $self->{'outhandle'};
119 print $outhandle "----- GoogleVisionImagePlugin run_convert -----\n";
120 # print STDERR "*****", $self->{'enable_document_ocr'}, "\n";
121
122 my @vision_type = (); # array containing target ocr / labelling type(s)
123
124 if ($self->{'enable_image_labelling'}) { push(@vision_type, "enable_image_labelling"); }
125 if ($self->{'enable_image_ocr'}) { push(@vision_type, "enable_image_ocr"); }
126 if ($self->{'enable_document_ocr'}) { push(@vision_type, "enable_document_ocr"); }
127
128 my $vision_type_length = @vision_type; # assigning scalar var to array returns length
129
130 if ($vision_type_length != 0) {
131
132 $self->init_cache_for_file($filename);
133 my $cached_image_dir = $self->{'cached_dir'};
134 # my $audio_root = $self->{'cached_file_root'};
135
136 # my $filename_no_path = &File::Basename::basename($filename);
137
138 my $ofile = "google-vision-output.json";
139 my $ofilename = &FileUtils::filenameConcatenate($cached_image_dir,"google-vision-output.json");
140 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
141 my $credentials_filename = &FileUtils::filenameConcatenate($collect_dir, "etc", "atea-storage-cd63a39dfeb5.json");
142
143 if ($vision_type_length == 1) {
144 my $vision_type_first = $vision_type[0];
145 my $vision_cmd = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename\"";
146
147 $self->run_vision($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type_first, $section, $doc_obj);
148 } elsif ($vision_type_length == 2) {
149 my $vision_type_first = $vision_type[0];
150 my $vision_type_second = $vision_type[1];
151
152 my $ofilename_first = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_first . "-google-vision-output.json");
153 my $ofilename_second = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_second . "-google-vision-output.json");
154
155 my $vision_cmd_1 = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_first\"";
156 my $vision_cmd_2 = "vision.py --$vision_type_second --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_second\"";
157
158 $self->run_vision($file, $filename, $ofile, $ofilename_first, $vision_cmd_1, $vision_type_first, $section, $doc_obj);
159 $self->run_vision($file, $filename, $ofile, $ofilename_second, $vision_cmd_2, $vision_type_second, $section, $doc_obj);
160 }
161 }
162
163 return "json";
164}
165
166sub run_vision {
167 use Data::Dumper;
168 my $self = shift (@_);
169 my ($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type, $section, $doc_obj) = @_;
170
171 my $vision_regenerated;
172 my $vision_result;
173 my $vision_error;
174
175 my $print_info = { 'message_prefix' => "GoogleVisionAPI",
176 'message' => "Sending $file to GoogleVisionAPI using vision.py" };
177
178 ($vision_regenerated,$vision_result,$vision_error)
179 = $self->run_cached_general_cmd($vision_cmd,$filename,$ofilename,$print_info);
180
181 $doc_obj->associate_file($ofilename,$vision_type . $ofile,"application/json",$section);
182
183 my $json_text = do { # read in json file
184 open(my $json_fh, "<:encoding(UTF-8)", $ofilename)
185 or die("Can't open \"$ofilename\": $!\n");
186 local $/;
187 <$json_fh>
188 };
189
190 my $cursection = $doc_obj->get_top_section(); # get top section for text append
191
192 my $decoded_json = from_json($json_text);
193 my $ocr_text;
194 if ($vision_type eq "enable_document_ocr" || $vision_type eq "enable_image_ocr") {
195 $ocr_text = $decoded_json->{textAnnotations}[0]{description}; # access full ocr content
196 $doc_obj->add_utf8_text( $cursection, $ocr_text); # append text to section
197
198 my $blocks = $decoded_json->{fullTextAnnotation}{pages}[0]{blocks};
199 my %text_and_language;
200
201 foreach my $block (@{ $blocks }) {
202 foreach my $paragraph (@{ $block->{paragraphs} }) {
203 foreach my $word (@{ $paragraph->{words} }) {
204 my $detected_language = $word->{property}{detectedLanguages}[0]{languageCode} || "no_lang";
205 my $word_text = "";
206 foreach my $letter (@{ $word->{symbols} }) {
207 $word_text .= $letter->{text};
208 }
209 $text_and_language{$detected_language} .= $word_text . " ";
210 }
211 }
212 }
213 for (keys %text_and_language) {
214 $doc_obj->add_utf8_metadata ($cursection, "z_" . $_, $text_and_language{$_});
215 }
216
217
218 } elsif ($vision_type eq "enable_image_labelling") {
219 $ocr_text = $decoded_json->{labelAnnotations};
220 foreach my $label (@{ $ocr_text }) {
221 # write to metadata : 'description'='Book' 'score'='0.9' 'topicality'='0.9' 'mid'='/m/0123'
222 $doc_obj->add_utf8_metadata ($cursection, "description", $label->{description});
223 $doc_obj->add_utf8_metadata ($cursection, "score", $label->{score});
224 $doc_obj->add_utf8_metadata ($cursection, "topicality", $label->{topicality});
225 $doc_obj->add_utf8_metadata ($cursection, "mid", $label->{mid});
226 # write to metadata : 'descriptions'='Book' 'Book_score'='0.9' 'Book_topicality'='0.9' 'Book_mid'='/m/0123'
227 $doc_obj->add_utf8_metadata ($cursection, "descriptions", $label->{description});
228 $doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_score", $label->{score});
229 $doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_topicality", $label->{topicality});
230 $doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_mid", $label->{mid});
231
232 }
233 }
234}
235
236# do plugin specific processing of doc_obj
237sub process {
238 my $self = shift (@_);
239 # options??
240 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
241
242 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
243 my $utf8_filename_no_path = $self->filepath_to_utf8($filename_no_path);
244 my $url_encoded_filename = &util::rename_file($utf8_filename_no_path, $self->{'file_rename_method'});
245
246 $self->run_convert($base_dir,$filename_full_path,$url_encoded_filename,$doc_obj);
247
248 $self->SUPER::process(@_);
249}
250
2511;
252
253
254
255
256
257
258
259
260
261
262
Note: See TracBrowser for help on using the repository browser.