source: gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionAPIConverter.pm@ 37000

Last change on this file since 37000 was 37000, checked in by davidb, 15 months ago

Changes after testing

File size: 10.0 KB
Line 
1######################################################################
2#
3# GoogleVisionAPIConverter.pm -- helper plugin that allows other plugins
4# (such as ImagePlugin and PagedImagePlugin) to extend their
5# processing capability through sub-classing inheritence (such as
6# GoogleVisionImagePlugin and GoogleVisionPagedImagePlugin) to
7# expand the image processing capabilities at ingest time to
8# include the Google Vision API allowing for: metadata labelling
9# of objects within a scene; and OCR text recognition.
10#
11# A component of the Greenstone digital library software
12# from the New Zealand Digital Library Project at the
13# University of Waikato, New Zealand.
14#
15# Copyright (C) 1999 New Zealand Digital Library Project
16#
17# This program is free software; you can redistribute it and/or modify
18# it under the terms of the GNU General Public License as published by
19# the Free Software Foundation; either version 2 of the License, or
20# (at your option) any later version.
21#
22# This program is distributed in the hope that it will be useful,
23# but WITHOUT ANY WARRANTY; without even the implied warranty of
24# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25# GNU General Public License for more details.
26#
27# You should have received a copy of the GNU General Public License
28# along with this program; if not, write to the Free Software
29# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30#
31###########################################################################
32
33package GoogleVisionAPIConverter;
34
35use strict;
36no strict 'refs'; # allow filehandles to be variables and viceversa
37no strict 'subs';
38
39use gsprintf;
40use FileUtils;
41
42##use ImagePlugin;
43use BaseMediaConverter;
44
45use utf8;
46use JSON qw( from_json );
47
48sub BEGIN {
49 @GoogleVisionAPIConverter::ISA = ('BaseMediaConverter');
50}
51
52my $arguments = [
53 { 'name' => "google_application_credentials",
54 'desc' => "{GoogleVisionAPIConverter.google_applicatio_credentials}",
55 'type' => "string",
56 'reqd' => "no",
57 'deft' => "google-sa-credentials-key.json"
58 },
59 { 'name' => "enable_image_labelling",
60 'desc' => "{GoogleVisionAPIConverter.enable_image_labelling}",
61 'type' => "flag",
62 'reqd' => "no" },
63 { 'name' => "enable_image_ocr",
64 'desc' => "{GoogleVisionAPIConverter.enable_image_ocr}",
65 'type' => "flag",
66 'reqd' => "no" },
67 { 'name' => "enable_document_ocr",
68 'desc' => "{GoogleVisionAPIConverter.enable_document_ocr}",
69 'type' => "flag",
70 'reqd' => "no" }
71];
72
73my $options = { 'name' => "GoogleVisionAPIConverter",
74 'desc' => "{GoogleVisionAPIConverter.desc}",
75 'abstract' => "no",
76 'inherits' => "yes",
77 'args' => $arguments };
78
79sub new {
80 my ($class) = shift (@_);
81 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
82 push(@$pluginlist, $class);
83
84 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
85 push(@{$hashArgOptLists->{"OptList"}},$options);
86
87 my $self = new BaseMediaConverter($pluginlist, $inputargs, $hashArgOptLists, 1);
88
89 return bless $self, $class;
90}
91
92sub begin {
93 my $self = shift (@_);
94 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
95
96 if ($self->{'enable_image_ocr'} && $self->{'enable_document_ocr'}) {
97 print STDERR "Please use the following command syntax for vision types: (--enable_image_ocr | --enable_document_ocr) [--enable_image_labelling]\n";
98 print STDERR "\t\t --enable_image_ocr : optical character recognition for text within images\n";
99 print STDERR "\t\t --enable_document_ocr : optical character recognition for text within documents\n";
100 print STDERR "\t\t --enable_image_labelling : annotation labeling for objects within images\n";
101 exit(2);
102 }
103
104 $self->SUPER::begin(@_);
105}
106
107sub vision_monitor_line {
108 my ($line) = @_;
109
110 my $had_error = 0;
111 my $generate_dot = 0;
112
113 if ($line =~ m/^.*$/)
114 {
115 $generate_dot = 1;
116 }
117
118 return ($had_error,$generate_dot);
119}
120
121sub run_gv_convert {
122 my $self = shift (@_);
123 my ($filename,$file,$doc_obj,$opt_section) = @_;
124
125 my $section = (defined $opt_section) ? $opt_section : $doc_obj->get_top_section();
126
127 my $verbosity = $self->{'verbosity'};
128 my $outhandle = $self->{'outhandle'};
129 print $outhandle "----- GoogleVisionAPIConveter run_gv_convert -----\n";
130 # print STDERR "*****", $self->{'enable_document_ocr'}, "\n";
131
132 my @vision_type = (); # array containing target ocr / labelling type(s)
133
134 if ($self->{'enable_image_labelling'}) { push(@vision_type, "enable_image_labelling"); }
135 if ($self->{'enable_image_ocr'}) { push(@vision_type, "enable_image_ocr"); }
136 if ($self->{'enable_document_ocr'}) { push(@vision_type, "enable_document_ocr"); }
137
138 my $vision_type_length = @vision_type; # assigning scalar var to array returns length
139
140 if ($vision_type_length != 0) {
141
142 $self->init_cache_for_file($filename);
143 my $cached_image_dir = $self->{'cached_dir'};
144 # my $audio_root = $self->{'cached_file_root'};
145
146 # my $filename_no_path = &File::Basename::basename($filename);
147
148 my $ofile = "google-vision-output.json";
149 my $ofilename = &FileUtils::filenameConcatenate($cached_image_dir,"google-vision-output.json");
150 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
151 my $credentials_filename = &FileUtils::filenameConcatenate($collect_dir, "etc", $self->{'google_application_credentials'});
152
153 if ($vision_type_length == 1) {
154 my $vision_type_first = $vision_type[0];
155 my $vision_cmd = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename\"";
156
157 $self->run_vision($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type_first, $section, $doc_obj);
158 } elsif ($vision_type_length == 2) {
159 my $vision_type_first = $vision_type[0];
160 my $vision_type_second = $vision_type[1];
161
162 my $ofilename_first = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_first . "-google-vision-output.json");
163 my $ofilename_second = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_second . "-google-vision-output.json");
164
165 my $vision_cmd_1 = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_first\"";
166 my $vision_cmd_2 = "vision.py --$vision_type_second --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_second\"";
167
168 $self->run_vision($file, $filename, $ofile, $ofilename_first, $vision_cmd_1, $vision_type_first, $section, $doc_obj);
169 $self->run_vision($file, $filename, $ofile, $ofilename_second, $vision_cmd_2, $vision_type_second, $section, $doc_obj);
170 }
171 }
172
173 return "json";
174}
175
176sub run_vision {
177 use Data::Dumper;
178 my $self = shift (@_);
179 my ($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type, $section, $doc_obj) = @_;
180
181 my $vision_regenerated;
182 my $vision_result;
183 my $vision_error;
184
185 my $print_info = { 'message_prefix' => "GoogleVisionAPI",
186 'message' => "Sending $file to GoogleVisionAPI using vision.py" };
187
188 ($vision_regenerated,$vision_result,$vision_error)
189 = $self->run_cached_general_cmd($vision_cmd,$filename,$ofilename,$print_info);
190
191 $doc_obj->associate_file($ofilename,$vision_type . $ofile,"application/json",$section);
192
193 my $json_text = do { # read in json file
194 open(my $json_fh, "<:encoding(UTF-8)", $ofilename)
195 or die("Can't open \"$ofilename\": $!\n");
196 local $/;
197 <$json_fh>
198 };
199
200 #my $cursection = $doc_obj->get_top_section(); # get top section for text append
201
202 my $decoded_json = from_json($json_text);
203 my $ocr_text;
204 if ($vision_type eq "enable_document_ocr" || $vision_type eq "enable_image_ocr") {
205 $ocr_text = $decoded_json->{textAnnotations}[0]{description}; # access full ocr content
206 $doc_obj->add_utf8_text( $section, $ocr_text); # append text to section
207
208 my $blocks = $decoded_json->{fullTextAnnotation}{pages}[0]{blocks};
209 my %text_and_language;
210
211 foreach my $block (@{ $blocks }) {
212 foreach my $paragraph (@{ $block->{paragraphs} }) {
213 foreach my $word (@{ $paragraph->{words} }) {
214 my $detected_language = $word->{property}{detectedLanguages}[0]{languageCode} || "no_lang";
215 my $word_text = "";
216 foreach my $letter (@{ $word->{symbols} }) {
217 $word_text .= $letter->{text};
218 }
219 $text_and_language{$detected_language} .= $word_text . " ";
220 }
221 }
222 }
223 for (keys %text_and_language) {
224 $doc_obj->add_utf8_metadata ($section, "z_" . $_, $text_and_language{$_});
225 }
226
227
228 } elsif ($vision_type eq "enable_image_labelling") {
229 $ocr_text = $decoded_json->{labelAnnotations};
230 foreach my $label (@{ $ocr_text }) {
231 # write to metadata : 'description'='Book' 'score'='0.9' 'topicality'='0.9' 'mid'='/m/0123'
232 $doc_obj->add_utf8_metadata ($section, "description", $label->{description});
233 $doc_obj->add_utf8_metadata ($section, "score", $label->{score});
234 $doc_obj->add_utf8_metadata ($section, "topicality", $label->{topicality});
235 $doc_obj->add_utf8_metadata ($section, "mid", $label->{mid});
236 # write to metadata : 'descriptions'='Book' 'Book_score'='0.9' 'Book_topicality'='0.9' 'Book_mid'='/m/0123'
237 $doc_obj->add_utf8_metadata ($section, "descriptions", $label->{description});
238 $doc_obj->add_utf8_metadata ($section, $label->{description} . "_score", $label->{score});
239 $doc_obj->add_utf8_metadata ($section, $label->{description} . "_topicality", $label->{topicality});
240 $doc_obj->add_utf8_metadata ($section, $label->{description} . "_mid", $label->{mid});
241
242 }
243 }
244}
245
2461;
247
248
249
250
251
252
253
254
255
256
257
Note: See TracBrowser for help on using the repository browser.