Context Navigation

source: gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionAPIConverter.pm@ 36989

Last change on this file since 36989 was 36989, checked in by davidb, 17 months ago
Initial work on developing a PagedImagePlugin that uses Google Vision API to OCR pages as part of the ingest process
File size: 9.7 KB

Line
1	######################################################################
2	#
3	# GoogleVisionAPIConverter.pm -- helper plugin that allows other plugins
4	# (such as ImagePlugin and PagedImagePlugin) to extend their
5	# processing capability through sub-classing inheritence (such as
6	# GoogleVisionImagePlugin and GoogleVisionPagedImagePlugin) to
7	# expand the image processing capabilities at ingest time to
8	# include the Google Vision API allowing for: metadata labelling
9	# of objects within a scene; and OCR text recognition.
10	#
11	# A component of the Greenstone digital library software
12	# from the New Zealand Digital Library Project at the
13	# University of Waikato, New Zealand.
14	#
15	# Copyright (C) 1999 New Zealand Digital Library Project
16	#
17	# This program is free software; you can redistribute it and/or modify
18	# it under the terms of the GNU General Public License as published by
19	# the Free Software Foundation; either version 2 of the License, or
20	# (at your option) any later version.
21	#
22	# This program is distributed in the hope that it will be useful,
23	# but WITHOUT ANY WARRANTY; without even the implied warranty of
24	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25	# GNU General Public License for more details.
26	#
27	# You should have received a copy of the GNU General Public License
28	# along with this program; if not, write to the Free Software
29	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30	#
31	###########################################################################
32
33	package GoogleVisionAPIConverter;
34
35	use strict;
36	no strict 'refs'; # allow filehandles to be variables and viceversa
37	no strict 'subs';
38
39	use gsprintf;
40	use FileUtils;
41
42	##use ImagePlugin;
43	use BaseMediaConverter;
44
45	use utf8;
46	use JSON qw( from_json );
47
48	sub BEGIN {
49	@GoogleVisionAPIConverter::ISA = ('BaseMediaConverter');
50	}
51
52	my $arguments =
53	[ { 'name' => "enable_image_labelling",
54	'desc' => "{GoogleVisionAPIConverter.enable_image_labelling}",
55	'type' => "flag",
56	'reqd' => "no" },
57	{ 'name' => "enable_image_ocr",
58	'desc' => "{GoogleVisionAPIConverter.enable_image_ocr}",
59	'type' => "flag",
60	'reqd' => "no" },
61	{ 'name' => "enable_document_ocr",
62	'desc' => "{GoogleVisionAPIConverter.enable_document_ocr}",
63	'type' => "flag",
64	'reqd' => "no" }
65	];
66
67	my $options = { 'name' => "GoogleVisionAPIConverter",
68	'desc' => "{GoogleVisionAPIConverter.desc}",
69	'abstract' => "no",
70	'inherits' => "yes",
71	'args' => $arguments };
72
73	sub new {
74	my ($class) = shift (@_);
75	my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
76	push(@$pluginlist, $class);
77
78	push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
79	push(@{$hashArgOptLists->{"OptList"}},$options);
80
81	my $self = new BaseMediaConverter($pluginlist, $inputargs, $hashArgOptLists, 1);
82
83	return bless $self, $class;
84	}
85
86	sub begin {
87	my $self = shift (@_);
88	my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
89
90	if ($self->{'enable_image_ocr'} && $self->{'enable_document_ocr'}) {
91	print STDERR "Please use the following command syntax for vision types: (--enable_image_ocr \| --enable_document_ocr) [--enable_image_labelling]\n";
92	print STDERR "\t\t --enable_image_ocr : optical character recognition for text within images\n";
93	print STDERR "\t\t --enable_document_ocr : optical character recognition for text within documents\n";
94	print STDERR "\t\t --enable_image_labelling : annotation labeling for objects within images\n";
95	exit(2);
96	}
97
98	$self->SUPER::begin(@_);
99	}
100
101	sub vision_monitor_line {
102	my ($line) = @_;
103
104	my $had_error = 0;
105	my $generate_dot = 0;
106
107	if ($line =~ m/^.*$/)
108	{
109	$generate_dot = 1;
110	}
111
112	return ($had_error,$generate_dot);
113	}
114
115	sub run_convert {
116	my $self = shift (@_);
117	my ($filename,$file,$doc_obj) = @_;
118
119	my $section = $doc_obj->get_top_section();
120
121	my $verbosity = $self->{'verbosity'};
122	my $outhandle = $self->{'outhandle'};
123	print $outhandle "----- GoogleVisionAPIConveter run_convert -----\n";
124	# print STDERR "*****", $self->{'enable_document_ocr'}, "\n";
125
126	my @vision_type = (); # array containing target ocr / labelling type(s)
127
128	if ($self->{'enable_image_labelling'}) { push(@vision_type, "enable_image_labelling"); }
129	if ($self->{'enable_image_ocr'}) { push(@vision_type, "enable_image_ocr"); }
130	if ($self->{'enable_document_ocr'}) { push(@vision_type, "enable_document_ocr"); }
131
132	my $vision_type_length = @vision_type; # assigning scalar var to array returns length
133
134	if ($vision_type_length != 0) {
135
136	$self->init_cache_for_file($filename);
137	my $cached_image_dir = $self->{'cached_dir'};
138	# my $audio_root = $self->{'cached_file_root'};
139
140	# my $filename_no_path = &File::Basename::basename($filename);
141
142	my $ofile = "google-vision-output.json";
143	my $ofilename = &FileUtils::filenameConcatenate($cached_image_dir,"google-vision-output.json");
144	my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
145	my $credentials_filename = &FileUtils::filenameConcatenate($collect_dir, "etc", "atea-storage-cd63a39dfeb5.json");
146
147	if ($vision_type_length == 1) {
148	my $vision_type_first = $vision_type[0];
149	my $vision_cmd = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename\"";
150
151	$self->run_vision($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type_first, $section, $doc_obj);
152	} elsif ($vision_type_length == 2) {
153	my $vision_type_first = $vision_type[0];
154	my $vision_type_second = $vision_type[1];
155
156	my $ofilename_first = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_first . "-google-vision-output.json");
157	my $ofilename_second = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_second . "-google-vision-output.json");
158
159	my $vision_cmd_1 = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_first\"";
160	my $vision_cmd_2 = "vision.py --$vision_type_second --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_second\"";
161
162	$self->run_vision($file, $filename, $ofile, $ofilename_first, $vision_cmd_1, $vision_type_first, $section, $doc_obj);
163	$self->run_vision($file, $filename, $ofile, $ofilename_second, $vision_cmd_2, $vision_type_second, $section, $doc_obj);
164	}
165	}
166
167	return "json";
168	}
169
170	sub run_vision {
171	use Data::Dumper;
172	my $self = shift (@_);
173	my ($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type, $section, $doc_obj) = @_;
174
175	my $vision_regenerated;
176	my $vision_result;
177	my $vision_error;
178
179	my $print_info = { 'message_prefix' => "GoogleVisionAPI",
180	'message' => "Sending $file to GoogleVisionAPI using vision.py" };
181
182	($vision_regenerated,$vision_result,$vision_error)
183	= $self->run_cached_general_cmd($vision_cmd,$filename,$ofilename,$print_info);
184
185	$doc_obj->associate_file($ofilename,$vision_type . $ofile,"application/json",$section);
186
187	my $json_text = do { # read in json file
188	open(my $json_fh, "<:encoding(UTF-8)", $ofilename)
189	or die("Can't open \"$ofilename\": $!\n");
190	local $/;
191	<$json_fh>
192	};
193
194	my $cursection = $doc_obj->get_top_section(); # get top section for text append
195
196	my $decoded_json = from_json($json_text);
197	my $ocr_text;
198	if ($vision_type eq "enable_document_ocr" \|\| $vision_type eq "enable_image_ocr") {
199	$ocr_text = $decoded_json->{textAnnotations}[0]{description}; # access full ocr content
200	$doc_obj->add_utf8_text( $cursection, $ocr_text); # append text to section
201
202	my $blocks = $decoded_json->{fullTextAnnotation}{pages}[0]{blocks};
203	my %text_and_language;
204
205	foreach my $block (@{ $blocks }) {
206	foreach my $paragraph (@{ $block->{paragraphs} }) {
207	foreach my $word (@{ $paragraph->{words} }) {
208	my $detected_language = $word->{property}{detectedLanguages}[0]{languageCode} \|\| "no_lang";
209	my $word_text = "";
210	foreach my $letter (@{ $word->{symbols} }) {
211	$word_text .= $letter->{text};
212	}
213	$text_and_language{$detected_language} .= $word_text . " ";
214	}
215	}
216	}
217	for (keys %text_and_language) {
218	$doc_obj->add_utf8_metadata ($cursection, "z_" . $_, $text_and_language{$_});
219	}
220
221
222	} elsif ($vision_type eq "enable_image_labelling") {
223	$ocr_text = $decoded_json->{labelAnnotations};
224	foreach my $label (@{ $ocr_text }) {
225	# write to metadata : 'description'='Book' 'score'='0.9' 'topicality'='0.9' 'mid'='/m/0123'
226	$doc_obj->add_utf8_metadata ($cursection, "description", $label->{description});
227	$doc_obj->add_utf8_metadata ($cursection, "score", $label->{score});
228	$doc_obj->add_utf8_metadata ($cursection, "topicality", $label->{topicality});
229	$doc_obj->add_utf8_metadata ($cursection, "mid", $label->{mid});
230	# write to metadata : 'descriptions'='Book' 'Book_score'='0.9' 'Book_topicality'='0.9' 'Book_mid'='/m/0123'
231	$doc_obj->add_utf8_metadata ($cursection, "descriptions", $label->{description});
232	$doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_score", $label->{score});
233	$doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_topicality", $label->{topicality});
234	$doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_mid", $label->{mid});
235
236	}
237	}
238	}
239
240	1;
241
242
243
244
245
246
247
248
249
250
251

Note: See TracBrowser for help on using the repository browser.

Download in other formats: