Context Navigation

source: gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionAPIConverter.pm@ 37010

Last change on this file since 37010 was 37010, checked in by davidb, 17 months ago
Changes to make it easier to display OCR bounding-box info in PagedImage doc view
File size: 11.0 KB

Line
1	######################################################################
2	#
3	# GoogleVisionAPIConverter.pm -- helper plugin that allows other plugins
4	# (such as ImagePlugin and PagedImagePlugin) to extend their
5	# processing capability through sub-classing inheritence (such as
6	# GoogleVisionImagePlugin and GoogleVisionPagedImagePlugin) to
7	# expand the image processing capabilities at ingest time to
8	# include the Google Vision API allowing for: metadata labelling
9	# of objects within a scene; and OCR text recognition.
10	#
11	# A component of the Greenstone digital library software
12	# from the New Zealand Digital Library Project at the
13	# University of Waikato, New Zealand.
14	#
15	# Copyright (C) 1999 New Zealand Digital Library Project
16	#
17	# This program is free software; you can redistribute it and/or modify
18	# it under the terms of the GNU General Public License as published by
19	# the Free Software Foundation; either version 2 of the License, or
20	# (at your option) any later version.
21	#
22	# This program is distributed in the hope that it will be useful,
23	# but WITHOUT ANY WARRANTY; without even the implied warranty of
24	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25	# GNU General Public License for more details.
26	#
27	# You should have received a copy of the GNU General Public License
28	# along with this program; if not, write to the Free Software
29	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30	#
31	###########################################################################
32
33	package GoogleVisionAPIConverter;
34
35	use strict;
36	no strict 'refs'; # allow filehandles to be variables and viceversa
37	no strict 'subs';
38
39	use gsprintf;
40	use FileUtils;
41
42	##use ImagePlugin;
43	use BaseMediaConverter;
44
45	use utf8;
46	use JSON qw( from_json );
47
48	sub BEGIN {
49	@GoogleVisionAPIConverter::ISA = ('BaseMediaConverter');
50	}
51
52	my $arguments = [
53	{ 'name' => "google_application_credentials",
54	'desc' => "{GoogleVisionAPIConverter.google_applicatio_credentials}",
55	'type' => "string",
56	'reqd' => "no",
57	'deft' => "google-sa-credentials-key.json"
58	},
59	{ 'name' => "enable_image_labelling",
60	'desc' => "{GoogleVisionAPIConverter.enable_image_labelling}",
61	'type' => "flag",
62	'reqd' => "no" },
63	{ 'name' => "enable_image_ocr",
64	'desc' => "{GoogleVisionAPIConverter.enable_image_ocr}",
65	'type' => "flag",
66	'reqd' => "no" },
67	{ 'name' => "enable_document_ocr",
68	'desc' => "{GoogleVisionAPIConverter.enable_document_ocr}",
69	'type' => "flag",
70	'reqd' => "no" }
71	];
72
73	my $options = { 'name' => "GoogleVisionAPIConverter",
74	'desc' => "{GoogleVisionAPIConverter.desc}",
75	'abstract' => "no",
76	'inherits' => "yes",
77	'args' => $arguments };
78
79	sub new {
80	my ($class) = shift (@_);
81	my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
82	push(@$pluginlist, $class);
83
84	push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
85	push(@{$hashArgOptLists->{"OptList"}},$options);
86
87	my $self = new BaseMediaConverter($pluginlist, $inputargs, $hashArgOptLists, 1);
88
89	return bless $self, $class;
90	}
91
92	sub begin {
93	my $self = shift (@_);
94	my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
95
96	if ($self->{'enable_image_ocr'} && $self->{'enable_document_ocr'}) {
97	print STDERR "Please use the following command syntax for vision types: (--enable_image_ocr \| --enable_document_ocr) [--enable_image_labelling]\n";
98	print STDERR "\t\t --enable_image_ocr : optical character recognition for text within images\n";
99	print STDERR "\t\t --enable_document_ocr : optical character recognition for text within documents\n";
100	print STDERR "\t\t --enable_image_labelling : annotation labeling for objects within images\n";
101	exit(2);
102	}
103
104	$self->SUPER::begin(@_);
105	}
106
107	sub vision_monitor_line {
108	my ($line) = @_;
109
110	my $had_error = 0;
111	my $generate_dot = 0;
112
113	if ($line =~ m/^.*$/)
114	{
115	$generate_dot = 1;
116	}
117
118	return ($had_error,$generate_dot);
119	}
120
121	sub run_gv_convert {
122	my $self = shift (@_);
123	my ($filename,$file,$doc_obj,$opt_section) = @_;
124
125	my $section = (defined $opt_section) ? $opt_section : $doc_obj->get_top_section();
126
127	my $verbosity = $self->{'verbosity'};
128	my $outhandle = $self->{'outhandle'};
129	print $outhandle "----- GoogleVisionAPIConveter run_gv_convert -----\n";
130	# print STDERR "*****", $self->{'enable_document_ocr'}, "\n";
131
132	my @vision_type = (); # array containing target ocr / labelling type(s)
133
134	if ($self->{'enable_image_labelling'}) { push(@vision_type, "enable_image_labelling"); }
135	if ($self->{'enable_image_ocr'}) { push(@vision_type, "enable_image_ocr"); }
136	if ($self->{'enable_document_ocr'}) { push(@vision_type, "enable_document_ocr"); }
137
138	my $vision_type_length = @vision_type; # assigning scalar var to array returns length
139
140	if ($vision_type_length != 0) {
141
142	$self->init_cache_for_file($filename);
143	my $cached_image_dir = $self->{'cached_dir'};
144	# my $audio_root = $self->{'cached_file_root'};
145
146	# my $filename_no_path = &File::Basename::basename($filename);
147
148	my $ofile = "google-vision-output.json";
149	my $ofilename = &FileUtils::filenameConcatenate($cached_image_dir,"google-vision-output.json");
150	my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
151	my $credentials_filename = &FileUtils::filenameConcatenate($collect_dir, "etc", $self->{'google_application_credentials'});
152
153	if ($vision_type_length == 1) {
154	my $vision_type_first = $vision_type[0];
155	my $vision_cmd = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename\"";
156
157	$self->run_vision($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type_first, $section, $doc_obj);
158	}
159	elsif ($vision_type_length == 2) {
160	my $vision_type_first = $vision_type[0];
161	my $vision_type_second = $vision_type[1];
162
163	my $ofilename_first = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_first . "-google-vision-output.json");
164	my $ofilename_second = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_second . "-google-vision-output.json");
165
166	my $vision_cmd_1 = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_first\"";
167	my $vision_cmd_2 = "vision.py --$vision_type_second --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_second\"";
168
169	$self->run_vision($file, $filename, $ofile, $ofilename_first, $vision_cmd_1, $vision_type_first, $section, $doc_obj);
170	$self->run_vision($file, $filename, $ofile, $ofilename_second, $vision_cmd_2, $vision_type_second, $section, $doc_obj);
171	}
172	}
173
174	return "json";
175	}
176
177	sub run_vision {
178	use Data::Dumper;
179	my $self = shift (@_);
180	my ($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type, $section, $doc_obj) = @_;
181
182	my $vision_regenerated;
183	my $vision_result;
184	my $vision_error;
185
186	my $print_info = { 'message_prefix' => "GoogleVisionAPI",
187	'message' => "Sending $file to GoogleVisionAPI using vision.py" };
188
189	($vision_regenerated,$vision_result,$vision_error)
190	= $self->run_cached_general_cmd($vision_cmd,$filename,$ofilename,$print_info);
191
192	# Need to work a bit harder in setting up the associated JSON file
193	# => strip of 'enable_' in favour of 'gv_'
194	# => add in section number as part of the file name to avoid clashes
195
196	my $gv_assoc_prefix = $vision_type;
197	$gv_assoc_prefix =~ s/^enable_/gv_/;
198
199	my $section_file_suffix = $section;
200	$section_file_suffix =~ s/\./_/g;
201
202	my $assoc_ofile = $gv_assoc_prefix.$ofile;
203	$assoc_ofile =~ s/\.(.*?)$/$section_file_suffix.$1/;
204
205	$doc_obj->associate_file($ofilename,$assoc_ofile,"application/json",$section);
206
207	my $json_text = do { # read in json file
208	open(my $json_fh, "<:encoding(UTF-8)", $ofilename)
209	or die("Can't open \"$ofilename\": $!\n");
210	local $/;
211	<$json_fh>
212	};
213
214	my $decoded_json = from_json($json_text);
215	my $ocr_text;
216	if ($vision_type eq "enable_document_ocr" \|\| $vision_type eq "enable_image_ocr") {
217	$ocr_text = $decoded_json->{textAnnotations}[0]{description}; # access full ocr content
218	$doc_obj->add_utf8_text( $section, $ocr_text); # append text to section
219
220	my $blocks = $decoded_json->{fullTextAnnotation}{pages}[0]{blocks};
221	my %text_and_language;
222
223	foreach my $block (@{ $blocks }) {
224	foreach my $paragraph (@{ $block->{paragraphs} }) {
225	foreach my $word (@{ $paragraph->{words} }) {
226	my $detected_language = $word->{property}{detectedLanguages}[0]{languageCode} \|\| "no_lang";
227	my $word_text = "";
228	foreach my $letter (@{ $word->{symbols} }) {
229	$word_text .= $letter->{text};
230	}
231	$text_and_language{$detected_language} .= $word_text . " ";
232	}
233	}
234	}
235
236	for (keys %text_and_language) {
237	$doc_obj->add_utf8_metadata ($section, "z_" . $_, $text_and_language{$_});
238	}
239
240
241	my $assoc_json_metaname = "HasGoogleVision";
242
243	if ($vision_type eq "enable_document_ocr") {
244	$assoc_json_metaname .= "DocumentOCRJSON";
245
246	$doc_obj->add_utf8_metadata ($section, "GVDocumentOCRJSON",$assoc_ofile);
247	}
248	else {
249	# $vision_type eq "enable_image_ocr")
250	$assoc_json_metaname .= "ImageOCRJSON";
251
252	$doc_obj->add_utf8_metadata ($section, "GVImageOCRJSON",$assoc_ofile);
253	}
254
255	$doc_obj->add_utf8_metadata ($section, $assoc_json_metaname, 1);
256	}
257	elsif ($vision_type eq "enable_image_labelling") {
258	$ocr_text = $decoded_json->{labelAnnotations};
259	foreach my $label (@{ $ocr_text }) {
260	# write to metadata : 'description'='Book' 'score'='0.9' 'topicality'='0.9' 'mid'='/m/0123'
261	$doc_obj->add_utf8_metadata ($section, "description", $label->{description});
262	$doc_obj->add_utf8_metadata ($section, "score", $label->{score});
263	$doc_obj->add_utf8_metadata ($section, "topicality", $label->{topicality});
264	$doc_obj->add_utf8_metadata ($section, "mid", $label->{mid});
265
266	# write to metadata : 'descriptions'='Book' 'Book_score'='0.9' 'Book_topicality'='0.9' 'Book_mid'='/m/0123'
267	$doc_obj->add_utf8_metadata ($section, "descriptions", $label->{description});
268	$doc_obj->add_utf8_metadata ($section, $label->{description} . "_score", $label->{score});
269	$doc_obj->add_utf8_metadata ($section, $label->{description} . "_topicality", $label->{topicality});
270	$doc_obj->add_utf8_metadata ($section, $label->{description} . "_mid", $label->{mid});
271
272	}
273
274	$doc_obj->add_utf8_metadata ($section, "HasGoogleVisionImageLabellingJSON", 1);
275	$doc_obj->add_utf8_metadata ($section, "GVImageLabellingJSON",$assoc_ofile);
276
277	}
278	}
279
280	1;
281
282
283
284
285
286
287
288
289
290
291

Note: See TracBrowser for help on using the repository browser.

Download in other formats: