source: gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionAPIConverter.pm@ 37060

Last change on this file since 37060 was 37060, checked in by davidb, 16 months ago

Removed the 'allinone' version of functions

File size: 19.9 KB
Line 
1######################################################################
2#
3# GoogleVisionAPIConverter.pm -- helper plugin that allows other plugins
4# (such as ImagePlugin and PagedImagePlugin) to extend their
5# processing capability through sub-classing inheritence (such as
6# GoogleVisionImagePlugin and GoogleVisionPagedImagePlugin) to
7# expand the image processing capabilities at ingest time to
8# include the Google Vision API allowing for: metadata labelling
9# of objects within a scene; and OCR text recognition.
10#
11# A component of the Greenstone digital library software
12# from the New Zealand Digital Library Project at the
13# University of Waikato, New Zealand.
14#
15# Copyright (C) 1999 New Zealand Digital Library Project
16#
17# This program is free software; you can redistribute it and/or modify
18# it under the terms of the GNU General Public License as published by
19# the Free Software Foundation; either version 2 of the License, or
20# (at your option) any later version.
21#
22# This program is distributed in the hope that it will be useful,
23# but WITHOUT ANY WARRANTY; without even the implied warranty of
24# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25# GNU General Public License for more details.
26#
27# You should have received a copy of the GNU General Public License
28# along with this program; if not, write to the Free Software
29# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30#
31###########################################################################
32
33package GoogleVisionAPIConverter;
34
35use strict;
36no strict 'refs'; # allow filehandles to be variables and viceversa
37no strict 'subs';
38
39use gsprintf;
40use FileUtils;
41
42##use ImagePlugin;
43use BaseMediaConverter;
44
45use utf8;
46use JSON; # qw( from_json, encode_json );
47
48sub BEGIN {
49 @GoogleVisionAPIConverter::ISA = ('BaseMediaConverter');
50}
51
52my $arguments = [
53 { 'name' => "google_application_credentials",
54 'desc' => "{GoogleVisionAPIConverter.google_applicatio_credentials}",
55 'type' => "string",
56 'reqd' => "no",
57 'deft' => "google-sa-credentials-key.json"
58 },
59 { 'name' => "enable_image_labelling",
60 'desc' => "{GoogleVisionAPIConverter.enable_image_labelling}",
61 'type' => "flag",
62 'reqd' => "no" },
63 { 'name' => "enable_image_ocr",
64 'desc' => "{GoogleVisionAPIConverter.enable_image_ocr}",
65 'type' => "flag",
66 'reqd' => "no" },
67 { 'name' => "enable_document_ocr",
68 'desc' => "{GoogleVisionAPIConverter.enable_document_ocr}",
69 'type' => "flag",
70 'reqd' => "no" }
71];
72
73my $options = { 'name' => "GoogleVisionAPIConverter",
74 'desc' => "{GoogleVisionAPIConverter.desc}",
75 'abstract' => "no",
76 'inherits' => "yes",
77 'args' => $arguments };
78
79sub new {
80 my ($class) = shift (@_);
81 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
82 push(@$pluginlist, $class);
83
84 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
85 push(@{$hashArgOptLists->{"OptList"}},$options);
86
87 my $self = new BaseMediaConverter($pluginlist, $inputargs, $hashArgOptLists, 1);
88
89 return bless $self, $class;
90}
91
92sub begin {
93 my $self = shift (@_);
94 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
95
96 if ($self->{'enable_image_ocr'} && $self->{'enable_document_ocr'}) {
97 print STDERR "Please use the following command syntax for vision types: (--enable_image_ocr | --enable_document_ocr) [--enable_image_labelling]\n";
98 print STDERR "\t\t --enable_image_ocr : optical character recognition for text within images\n";
99 print STDERR "\t\t --enable_document_ocr : optical character recognition for text within documents\n";
100 print STDERR "\t\t --enable_image_labelling : annotation labeling for objects within images\n";
101 exit(2);
102 }
103
104 $self->SUPER::begin(@_);
105}
106
107sub vision_monitor_line {
108 my ($line) = @_;
109
110 my $had_error = 0;
111 my $generate_dot = 0;
112
113 if ($line =~ m/^.*$/)
114 {
115 $generate_dot = 1;
116 }
117
118 return ($had_error,$generate_dot);
119}
120
121sub run_gv_convert {
122 my $self = shift (@_);
123 my ($filename,$file,$doc_obj,$opt_section) = @_;
124
125 my $section = (defined $opt_section) ? $opt_section : $doc_obj->get_top_section();
126
127 my $verbosity = $self->{'verbosity'};
128 my $outhandle = $self->{'outhandle'};
129 print $outhandle "----- GoogleVisionAPIConveter run_gv_convert -----\n";
130
131 my @vision_types = (); # array containing target ocr / labelling type(s)
132
133 if ($self->{'enable_image_labelling'}) { push(@vision_types, "enable_image_labelling"); }
134 if ($self->{'enable_image_ocr'}) { push(@vision_types, "enable_image_ocr"); }
135 if ($self->{'enable_document_ocr'}) { push(@vision_types, "enable_document_ocr"); }
136
137 my $vision_types_length = scalar(@vision_types);
138
139 if ($vision_types_length != 0) {
140
141 $self->init_cache_for_file($filename);
142 my $cached_image_dir = $self->{'cached_dir'};
143 # my $cached_image_root = $self->{'cached_file_root'};
144
145 # my $filename_no_path = &File::Basename::basename($filename);
146
147 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
148 my $credentials_filename = &FileUtils::filenameConcatenate($collect_dir, "etc", $self->{'google_application_credentials'});
149
150 for my $vision_type (@vision_types) {
151
152 my $ofile = "${vision_type}-google-vision-output.json";
153 my $ofilename = &FileUtils::filenameConcatenate($cached_image_dir,$ofile);
154
155 my $vision_cmd = "vision.py --$vision_type --credentials \"$credentials_filename\" \"$filename\" \"$ofilename\"";
156
157 $self->run_vision($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type, $doc_obj,$section);
158
159 if ($vision_type eq "enable_document_ocr") {
160 my $gv_dococr_rec = { 'filename' => $ofilename, 'section' => $section };
161
162 push(@{$self->{'gv-dococr-json-filename-recs'}}, $gv_dococr_rec);
163 }
164 }
165 }
166
167 return "json";
168}
169
170sub gv_ocr_bounding_box_rect
171{
172 my $self = shift (@_);
173 my ($gv_block,) = @_;
174
175 my $bbox_rect = undef;
176
177 my $gv_boundingBox = $gv_block->{'boundingBox'};
178
179 my $gv_vertices = $gv_boundingBox->{'vertices'};
180 my $gv_num_vertices = scalar(@$gv_vertices);
181
182 if ($gv_num_vertices > 0) {
183 # print STDERR "**** gs_vertices[0] = ", JSON::encode_json($gv_vertices->[0]), "\n";
184
185 # Discovered that sometimes the 'x' value in the 'vertices' structure is not defined
186 # So can't rely on picking up $gv_vertices->[0 for 'x' and 'y'
187 # start off with 'undef' and test for !defined in for-loop
188
189 my $min_x = undef;
190 my $min_y = undef;
191 my $max_x = undef;
192 my $max_y = undef;
193
194
195 for (my $v=0; $v<$gv_num_vertices; $v++) {
196 my $x = $gv_vertices->[$v]->{'x'};
197 my $y = $gv_vertices->[$v]->{'y'};
198
199 if (defined $x) {
200 $min_x = $x if (!defined $min_x || ($x < $min_x));
201 $max_x = $x if (!defined $max_x || ($x > $max_x));
202 }
203
204 if (defined $y) {
205 $min_y = $y if (!defined $min_y || ($y < $min_y));
206 $max_y = $y if (!defined $max_y || ($y > $max_y));
207 }
208 }
209
210 my $x_org = $min_x;
211 my $y_org = $min_y;
212 my $x_dim = $max_x - $min_x +1;
213 my $y_dim = $max_y - $min_y +1;
214
215 $bbox_rect = { "x_org" => $x_org, "y_org" => $y_org, "x_dim" => $x_dim, "y_dim" => $y_dim};
216 }
217
218 return $bbox_rect;
219}
220
221sub run_vision
222{
223 my $self = shift (@_);
224 my ($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type, $doc_obj,$section) = @_;
225
226 my $vision_regenerated;
227 my $vision_result;
228 my $vision_error;
229
230 my $print_info = {
231 'message_prefix' => "GoogleVisionAPI",
232 'message' => "Sending $file to GoogleVisionAPI using vision.py"
233 };
234
235 ($vision_regenerated,$vision_result,$vision_error)
236 = $self->run_cached_general_cmd($vision_cmd,$filename,$ofilename,$print_info);
237
238 # Need to work a bit harder in setting up the associated JSON file
239 # => strip of 'enable_' in favour of 'gv_'
240 # => add in section number as part of the file name to avoid clashes
241
242 my $section_file_suffix = $section;
243 $section_file_suffix =~ s/\./_/g;
244
245 my $assoc_ofile = $ofile;
246 $assoc_ofile =~ s/^enable_/gv_/;
247 $assoc_ofile =~ s/-google-vision//;
248 $assoc_ofile =~ s/\.(.*?)$/$section_file_suffix.$1/;
249
250 $doc_obj->associate_file($ofilename,$assoc_ofile,"application/json",$section);
251
252 my $json_text = do { # read in json file
253 open(my $json_fh, "<:encoding(UTF-8)", $ofilename)
254 or die("Can't open \"$ofilename\": $!\n");
255 local $/;
256 <$json_fh>
257 };
258
259 my $decoded_json = JSON::from_json($json_text);
260
261 my $ocr_text;
262 if ($vision_type eq "enable_document_ocr" || $vision_type eq "enable_image_ocr") {
263 $ocr_text = $decoded_json->{'textAnnotations'}[0]{'description'}; # access full ocr content
264 $doc_obj->add_utf8_text($section, $ocr_text); # append text to section
265
266 my $blocks = $decoded_json->{'fullTextAnnotation'}{'pages'}[0]{'blocks'};
267 my %text_and_language;
268
269 foreach my $block (@{$blocks}) {
270 foreach my $paragraph (@{$block->{'paragraphs'}}) {
271 foreach my $word (@{$paragraph->{'words'}}) {
272 my $detected_language = $word->{'property'}{'detectedLanguages'}[0]{'languageCode'} || "no_lang";
273 my $word_text = "";
274 foreach my $letter (@{$word->{'symbols'}}) {
275 $word_text .= $letter->{'text'};
276 }
277 $text_and_language{$detected_language} .= $word_text . " ";
278 }
279 }
280 }
281
282 for (keys %text_and_language) {
283 $doc_obj->add_utf8_metadata($section, "z_" . $_, $text_and_language{$_});
284 }
285
286
287 my $assoc_json_metaname = "HasGoogleVision";
288
289 if ($vision_type eq "enable_document_ocr") {
290 $assoc_json_metaname .= "DocumentOCRJSON";
291
292 $doc_obj->add_utf8_metadata($section, "GVDocumentOCRJSON",$assoc_ofile);
293 }
294 else {
295 # $vision_type eq "enable_image_ocr"
296 $assoc_json_metaname .= "ImageOCRJSON";
297
298 $doc_obj->add_utf8_metadata($section, "GVImageOCRJSON",$assoc_ofile);
299 }
300
301 $doc_obj->add_utf8_metadata($section, $assoc_json_metaname, 1);
302 }
303 elsif ($vision_type eq "enable_image_labelling") {
304 $ocr_text = $decoded_json->{'labelAnnotations'};
305 foreach my $label (@{$ocr_text}) {
306 # write to metadata : 'description'='Book' 'score'='0.9' 'topicality'='0.9' 'mid'='/m/0123'
307 $doc_obj->add_utf8_metadata($section, "description", $label->{'description'});
308 $doc_obj->add_utf8_metadata($section, "score", $label->{'score'});
309 $doc_obj->add_utf8_metadata($section, "topicality", $label->{'topicality'});
310 $doc_obj->add_utf8_metadata($section, "mid", $label->{'mid'});
311
312 # write to metadata : 'descriptions'='Book' 'Book_score'='0.9' 'Book_topicality'='0.9' 'Book_mid'='/m/0123'
313 $doc_obj->add_utf8_metadata($section, "descriptions", $label->{'description'});
314 $doc_obj->add_utf8_metadata($section, $label->{'description'} . "_score", $label->{'score'});
315 $doc_obj->add_utf8_metadata($section, $label->{'description'} . "_topicality", $label->{'topicality'});
316 $doc_obj->add_utf8_metadata($section, $label->{'description'} . "_mid", $label->{'mid'});
317
318 }
319
320 $doc_obj->add_utf8_metadata($section, "HasGoogleVisionImageLabellingJSON", 1);
321 $doc_obj->add_utf8_metadata($section, "GVImageLabellingJSON",$assoc_ofile);
322
323 }
324}
325
326sub start_openannotation_list
327{
328 my $self = shift (@_);
329 my ($doc_obj,$section) = @_;
330
331 my $OID = $doc_obj->get_OID();
332
333 my $openannotation_list = {
334 "\@context" => "http://www.shared-canvas.org/ns/context.json",
335 # "\@id" => "https://iiif.harvardartmuseums.org/manifests/object/299843/list/47174896",
336 "\@id" => "${OID}/openannotation-list.json", # #### **** make full URL to be unique? or greenstone3:site:collect:OID ??
337 "\@type" => "sc:AnnotationList",
338 "resources" => []
339 };
340
341 $self->{'openannotation-list'} = $openannotation_list;
342}
343
344
345sub convert_gvocr_to_openannotation_resource
346{
347 my $self = shift (@_);
348 my ($gv_blocks, $doc_obj, $section) = @_;
349
350 my $OID = $doc_obj->get_OID();
351 my $OID_with_section = ($section ne "") ? "${OID}_$section" : $OID;
352 $section = 1 if ($section eq ""); # occurs when the document is a single image
353
354
355 # Example Open Annotation resource (for single annotation):
356# {
357# "@context": "http://iiif.io/api/presentation/2/context.json",
358# "@id": "https://iiif.harvardartmuseums.org/annotations/9641482",
359# "@type": "oa:Annotation",
360# "motivation": [
361# "oa:commenting"
362# ],
363# "on": {
364# "@type": "oa:SpecificResource",
365# "full": "https://iiif.harvardartmuseums.org/manifests/object/299843/canvas/canvas-47174896",
366# "selector": {
367# "@type": "oa:FragmentSelector",
368# "value": "xywh=622,591,642,940"
369# },
370# "within": {
371# "@id": "https://iiif.harvardartmuseums.org/manifests/object/299843",
372# "@type": "sc:Manifest"
373# }
374# },
375# "resource": [
376# {
377# "@type": "dctypes:Text",
378# "chars": "<p>age: 35-52<br/>gender: Female(66.337677%)<br/>CALM: 55.438412%<br/>CONFUSED: 3.949288%<br/>SURPRISED: 2.33092%<br/>DISGUSTED:
379# 0.545727%<br/>HAPPY: 1.549943%<br/>ANGRY: 2.082294%<br/>SAD: 34.103416%<br/></p><p>Generated by AWS Rekognition</p>",
380# "format": "text/html"
381# }
382# ]
383# },
384
385 my $self_openannotation_resources = $self->{'openannotation-list'}->{'resources'};
386
387 my $block_i = 0;
388
389 foreach my $block (@{$gv_blocks}) {
390 $block_i++;
391
392 my $openannotation_resource = {
393 "\@context" => "http://iiif.io/api/presentation/2/context.json",
394 # "\@id" => "https://iiif.harvardartmuseums.org/annotations/9641482",
395 "\@id" => "${OID_with_section}/annotation/gv-block-$block_i", # #### **** make full URL to be unique? or greenstone3:site:collect:OID ??
396 "\@type" => "oa:Annotation",
397 "motivation" => [ "oa:commenting" ]
398 };
399
400
401 my $bbox_rect = $self->gv_ocr_bounding_box_rect($block);
402 my $bb_x_org = $bbox_rect->{'x_org'};
403 my $bb_y_org = $bbox_rect->{'y_org'};
404 my $bb_x_dim = $bbox_rect->{'x_dim'};
405 my $bb_y_dim = $bbox_rect->{'y_dim'};
406
407 my $openannotation_on = {
408 "\@type" => "oa:SpecificResource",
409 # "full" => "https://iiif.harvardartmuseums.org/manifests/object/299843/canvas/canvas-47174896",
410 "full" => "${OID}/canvas/$section", # doc id + /canvas + page-i/sect # #### **** make full URL to be unique? or greenstone3:site:collect:OID ??
411 "selector" => {
412 "\@type" => "oa:FragmentSelector",
413 "value" => "xywh=${bb_x_org},${bb_y_org},${bb_x_dim},${bb_y_dim}"
414 },
415 "within" => {
416 #"\@id" => "https://iiif.harvardartmuseums.org/manifests/object/299843",
417 "\@id" => "${OID_with_section}/manifest", # #### **** make full URL to be unique? or greenstone3:site:collect:OID... ??
418 "\@type" => "sc:Manifest"
419 }
420 };
421 $openannotation_resource->{'on'} = $openannotation_on;
422
423
424 my $block_text_html = "";
425
426 foreach my $paragraph (@{$block->{'paragraphs'}}) {
427 my $para_text = "";
428
429 foreach my $word (@{$paragraph->{'words'}}) {
430 my $word_text = "";
431
432 foreach my $letter (@{$word->{'symbols'}}) {
433 $word_text .= $letter->{'text'};
434 }
435
436 $para_text .= " " if $para_text ne "";
437 $para_text .= $word_text;
438 }
439
440 $block_text_html .= "<p>\n$para_text\n</p>\n\n";
441 }
442
443 my $openannotation_inner_resource = [{
444 "\@type" => "dctypes:Text",
445 "chars" => "$block_text_html",
446 "format" => "text/html"
447 }];
448
449 $openannotation_resource->{'resource'} = $openannotation_inner_resource;
450
451 push(@$self_openannotation_resources,$openannotation_resource);
452 }
453}
454
455
456sub convert_and_append_openannotation_resources
457{
458 my $self = shift (@_);
459 my ($gv_dococr_json_filename, $doc_obj, $section) = @_;
460
461
462 # Read in JSON file
463 my $json_text = do {
464 open(my $json_fh, "<:encoding(UTF-8)", $gv_dococr_json_filename)
465 or die("Can't open \"$gv_dococr_json_filename\": $!\n");
466 local $/;
467 <$json_fh>
468 };
469
470 my $decoded_json = JSON::from_json($json_text);
471
472 my $gv_blocks = $decoded_json->{'fullTextAnnotation'}->{'pages'}->[0]->{'blocks'};
473
474 $self->convert_gvocr_to_openannotation_resource($gv_blocks, $doc_obj, $section);
475}
476
477
478
479sub end_openannotation_list
480{
481 my $self = shift (@_);
482 my ($doc_obj,$json_ofilename) = @_;
483
484 my $ret_status = 1;
485
486 if (!open(JOUT, "> $json_ofilename")) {
487 print STDERR "Error: Failed save Open Annotation List JSON to \"$json_ofilename\":\n $!\n";
488 $ret_status = 0;
489 }
490 else {
491 binmode(JOUT, ":utf8");
492
493 my $openannotation_list = $self->{'openannotation-list'};
494 my $openannotation_list_json_text = JSON::encode_json($openannotation_list);
495
496 print JOUT $openannotation_list_json_text;
497 close JOUT;
498
499 }
500
501 $self->{'openannotation-list'} = undef;
502
503 return $ret_status;
504}
505
506sub openannotation_list_associate_json
507{
508 my $self = shift (@_);
509 my ($doc_obj, $gv_dococr_json_filename_recs) = @_;
510
511 my $outhandle = $self->{'outhandle'};
512
513 my $all_saved_ok = 1;
514
515 for my $gv_json_filename_rec (@$gv_dococr_json_filename_recs) {
516 my $gv_json_filename = $gv_json_filename_rec->{'filename'};
517 my $section = $gv_json_filename_rec->{'section'};
518
519
520 my ($gv_dococr_filename_root) = ($gv_dococr_json_filename_recs->[0]->{'filename'} =~ m/^(.+)\.json$/);
521
522 # slight of hand so new directory spot in cache_dir picked out is where we want it!
523 $gv_dococr_filename_root .= "/";
524
525 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
526 my $toplevel_cached_dir = &FileUtils::filenameConcatenate($collect_dir,"cached");
527
528 $self->init_cache_for_file($gv_dococr_filename_root);
529 my $cached_dir = $self->{'cached_dir'};
530
531 my $assoc_openannotation_json_ofile = "openannotation-list${section}.json";
532 my $cached_openannotation_json_ofilename = &FileUtils::filenameConcatenate($cached_dir,$assoc_openannotation_json_ofile);
533
534 my $needs_json_regen = 0;
535
536 if (!-f $cached_openannotation_json_ofilename) {
537 $needs_json_regen = 1;
538 }
539 else {
540 if (-M $gv_json_filename > -M $cached_openannotation_json_ofilename) {
541 $needs_json_regen = 1;
542 }
543 }
544
545 my $saved_ok = 1;
546
547 if ($needs_json_regen) {
548
549 print $outhandle " OpenAnnotation-List: Generating $cached_openannotation_json_ofilename\n";
550
551 $self->start_openannotation_list($doc_obj);
552 $self->convert_and_append_openannotation_resources($gv_json_filename, $doc_obj,$section);
553
554 $saved_ok = $self->end_openannotation_list($doc_obj,$cached_openannotation_json_ofilename);
555 }
556 else {
557 print $outhandle " OpenAnnotation-List: Cached file $cached_openannotation_json_ofilename already exists\n";
558 }
559
560 if ($saved_ok) {
561 my $top_section = $doc_obj->get_top_section();
562 $doc_obj->associate_file($cached_openannotation_json_ofilename,$assoc_openannotation_json_ofile,"application/json",$top_section);
563 }
564 else {
565 $all_saved_ok = 0;
566 }
567 }
568
569 return $all_saved_ok;
570}
571
572
573sub opt_run_gen_openannotation
574{
575 my $self = shift (@_);
576 my ($doc_obj) = @_;
577
578 my $gv_dococr_json_filename_recs = $self->{'gv-dococr-json-filename-recs'};
579 my $num_gv_dococr_json_filename_recs = scalar(@$gv_dococr_json_filename_recs);
580
581 my $ret_val_ok = 1;
582
583 if ($num_gv_dococr_json_filename_recs > 0) {
584 $ret_val_ok = $self->openannotation_list_associate_json($doc_obj,$gv_dococr_json_filename_recs);
585 }
586
587 return $ret_val_ok;
588}
589
5901;
591
Note: See TracBrowser for help on using the repository browser.