Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionImagePlugin.pm@ 36247

Last change on this file since 36247 was 36247, checked in by davidb, 23 months ago
initial cut at files to provide google vision api processing of images using the GoogleVisionImagePlugin
File size: 10.2 KB

Line
1	######################################################################
2	#
3	# GoogleVisionImagePlugin.pm -- plugin that extends the capability of
4	# ImagePlugin to use Google Vision API allowing for: metadata labelling
5	# of objects within a scene; OCR text recognition.
6	# A component of the Greenstone digital library software
7	# from the New Zealand Digital Library Project at the
8	# University of Waikato, New Zealand.
9	#
10	# Copyright (C) 1999 New Zealand Digital Library Project
11	#
12	# This program is free software; you can redistribute it and/or modify
13	# it under the terms of the GNU General Public License as published by
14	# the Free Software Foundation; either version 2 of the License, or
15	# (at your option) any later version.
16	#
17	# This program is distributed in the hope that it will be useful,
18	# but WITHOUT ANY WARRANTY; without even the implied warranty of
19	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20	# GNU General Public License for more details.
21	#
22	# You should have received a copy of the GNU General Public License
23	# along with this program; if not, write to the Free Software
24	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25	#
26	###########################################################################
27
28	package GoogleVisionImagePlugin;
29
30	use strict;
31	no strict 'refs'; # allow filehandles to be variables and viceversa
32	no strict 'subs';
33
34	use gsprintf;
35	use FileUtils;
36
37	use ImagePlugin;
38	use BaseMediaConverter;
39
40	use utf8;
41	use JSON qw( from_json );
42
43	sub BEGIN {
44	@GoogleVisionImagePlugin::ISA = ('ImagePlugin', 'BaseMediaConverter');
45	}
46
47	my $arguments =
48	[ { 'name' => "enable_image_labelling",
49	'desc' => "{GoogleVisionImagePlugin.enable_image_labelling}",
50	'type' => "flag",
51	'reqd' => "no" },
52	{ 'name' => "enable_image_ocr",
53	'desc' => "{GoogleVisionImagePlugin.enable_image_ocr}",
54	'type' => "flag",
55	'reqd' => "no" },
56	{ 'name' => "enable_document_ocr",
57	'desc' => "{GoogleVisionImagePlugin.enable_document_ocr}",
58	'type' => "flag",
59	'reqd' => "no" }
60	];
61
62	my $options = { 'name' => "GoogleVisionImagePlugin",
63	'desc' => "{GoogleVisionImagePlugin.desc}",
64	'abstract' => "no",
65	'inherits' => "yes",
66	'args' => $arguments };
67
68	sub new {
69	my ($class) = shift (@_);
70	my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
71	push(@$pluginlist, $class);
72
73	push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
74	push(@{$hashArgOptLists->{"OptList"}},$options);
75
76	new BaseMediaConverter($pluginlist, $inputargs, $hashArgOptLists, 1);
77	my $self = new ImagePlugin($pluginlist, $inputargs, $hashArgOptLists);
78
79	return bless $self, $class;
80	}
81
82	sub begin {
83	my $self = shift (@_);
84	my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
85
86	if ($self->{'enable_image_ocr'} && $self->{'enable_document_ocr'}) {
87	print STDERR "Please use the following command syntax for vision types: (--enable_image_ocr \| --enable_document_ocr) [--enable_image_labelling]\n";
88	print STDERR "\t\t --enable_image_ocr : optical character recognition for text within images\n";
89	print STDERR "\t\t --enable_document_ocr : optical character recognition for text within documents\n";
90	print STDERR "\t\t --enable_image_labelling : annotation labeling for objects within images\n";
91	exit(2);
92	}
93
94	$self->SUPER::begin(@_);
95	}
96
97	sub vision_monitor_line {
98	my ($line) = @_;
99
100	my $had_error = 0;
101	my $generate_dot = 0;
102
103	if ($line =~ m/^.*$/)
104	{
105	$generate_dot = 1;
106	}
107
108	return ($had_error,$generate_dot);
109	}
110
111	sub run_convert {
112	my $self = shift (@_);
113	my ($base_dir,$filename,$file,$doc_obj) = @_;
114
115	my $section = $doc_obj->get_top_section();
116
117	my $verbosity = $self->{'verbosity'};
118	my $outhandle = $self->{'outhandle'};
119	print $outhandle "----- GoogleVisionImagePlugin run_convert -----\n";
120	# print STDERR "*****", $self->{'enable_document_ocr'}, "\n";
121
122	my @vision_type = (); # array containing target ocr / labelling type(s)
123
124	if ($self->{'enable_image_labelling'}) { push(@vision_type, "enable_image_labelling"); }
125	if ($self->{'enable_image_ocr'}) { push(@vision_type, "enable_image_ocr"); }
126	if ($self->{'enable_document_ocr'}) { push(@vision_type, "enable_document_ocr"); }
127
128	my $vision_type_length = @vision_type; # assigning scalar var to array returns length
129
130	if ($vision_type_length != 0) {
131
132	$self->init_cache_for_file($filename);
133	my $cached_image_dir = $self->{'cached_dir'};
134	# my $audio_root = $self->{'cached_file_root'};
135
136	# my $filename_no_path = &File::Basename::basename($filename);
137
138	my $ofile = "google-vision-output.json";
139	my $ofilename = &FileUtils::filenameConcatenate($cached_image_dir,"google-vision-output.json");
140	my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
141	my $credentials_filename = &FileUtils::filenameConcatenate($collect_dir, "etc", "atea-storage-cd63a39dfeb5.json");
142
143	if ($vision_type_length == 1) {
144	my $vision_type_first = $vision_type[0];
145	my $vision_cmd = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename\"";
146
147	$self->run_vision($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type_first, $section, $doc_obj);
148	} elsif ($vision_type_length == 2) {
149	my $vision_type_first = $vision_type[0];
150	my $vision_type_second = $vision_type[1];
151
152	my $ofilename_first = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_first . "-google-vision-output.json");
153	my $ofilename_second = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_second . "-google-vision-output.json");
154
155	my $vision_cmd_1 = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_first\"";
156	my $vision_cmd_2 = "vision.py --$vision_type_second --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_second\"";
157
158	$self->run_vision($file, $filename, $ofile, $ofilename_first, $vision_cmd_1, $vision_type_first, $section, $doc_obj);
159	$self->run_vision($file, $filename, $ofile, $ofilename_second, $vision_cmd_2, $vision_type_second, $section, $doc_obj);
160	}
161	}
162
163	return "json";
164	}
165
166	sub run_vision {
167	use Data::Dumper;
168	my $self = shift (@_);
169	my ($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type, $section, $doc_obj) = @_;
170
171	my $vision_regenerated;
172	my $vision_result;
173	my $vision_error;
174
175	my $print_info = { 'message_prefix' => "GoogleVisionAPI",
176	'message' => "Sending $file to GoogleVisionAPI using vision.py" };
177
178	($vision_regenerated,$vision_result,$vision_error)
179	= $self->run_cached_general_cmd($vision_cmd,$filename,$ofilename,$print_info);
180
181	$doc_obj->associate_file($ofilename,$vision_type . $ofile,"application/json",$section);
182
183	my $json_text = do { # read in json file
184	open(my $json_fh, "<:encoding(UTF-8)", $ofilename)
185	or die("Can't open \"$ofilename\": $!\n");
186	local $/;
187	<$json_fh>
188	};
189
190	my $cursection = $doc_obj->get_top_section(); # get top section for text append
191
192	my $decoded_json = from_json($json_text);
193	my $ocr_text;
194	if ($vision_type eq "enable_document_ocr" \|\| $vision_type eq "enable_image_ocr") {
195	$ocr_text = $decoded_json->{textAnnotations}[0]{description}; # access full ocr content
196	$doc_obj->add_utf8_text( $cursection, $ocr_text); # append text to section
197
198	my $blocks = $decoded_json->{fullTextAnnotation}{pages}[0]{blocks};
199	my %text_and_language;
200
201	foreach my $block (@{ $blocks }) {
202	foreach my $paragraph (@{ $block->{paragraphs} }) {
203	foreach my $word (@{ $paragraph->{words} }) {
204	my $detected_language = $word->{property}{detectedLanguages}[0]{languageCode} \|\| "no_lang";
205	my $word_text = "";
206	foreach my $letter (@{ $word->{symbols} }) {
207	$word_text .= $letter->{text};
208	}
209	$text_and_language{$detected_language} .= $word_text . " ";
210	}
211	}
212	}
213	for (keys %text_and_language) {
214	$doc_obj->add_utf8_metadata ($cursection, "z_" . $_, $text_and_language{$_});
215	}
216
217
218	} elsif ($vision_type eq "enable_image_labelling") {
219	$ocr_text = $decoded_json->{labelAnnotations};
220	foreach my $label (@{ $ocr_text }) {
221	# write to metadata : 'description'='Book' 'score'='0.9' 'topicality'='0.9' 'mid'='/m/0123'
222	$doc_obj->add_utf8_metadata ($cursection, "description", $label->{description});
223	$doc_obj->add_utf8_metadata ($cursection, "score", $label->{score});
224	$doc_obj->add_utf8_metadata ($cursection, "topicality", $label->{topicality});
225	$doc_obj->add_utf8_metadata ($cursection, "mid", $label->{mid});
226	# write to metadata : 'descriptions'='Book' 'Book_score'='0.9' 'Book_topicality'='0.9' 'Book_mid'='/m/0123'
227	$doc_obj->add_utf8_metadata ($cursection, "descriptions", $label->{description});
228	$doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_score", $label->{score});
229	$doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_topicality", $label->{topicality});
230	$doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_mid", $label->{mid});
231
232	}
233	}
234	}
235
236	# do plugin specific processing of doc_obj
237	sub process {
238	my $self = shift (@_);
239	# options??
240	my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
241
242	my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
243	my $utf8_filename_no_path = $self->filepath_to_utf8($filename_no_path);
244	my $url_encoded_filename = &util::rename_file($utf8_filename_no_path, $self->{'file_rename_method'});
245
246	$self->run_convert($base_dir,$filename_full_path,$url_encoded_filename,$doc_obj);
247
248	$self->SUPER::process(@_);
249	}
250
251	1;
252
253
254
255
256
257
258
259
260
261
262

Note: See TracBrowser for help on using the repository browser.

Download in other formats: