Context Navigation

← Previous Changeset
Next Changeset →

Changeset 36988

Timestamp:

2022-12-07T22:45:39+13:00 (17 months ago)

Author:

davidb

Message:

Code refactor to GoogleVisionAPI in a separate Perl module so it can be used in another Plugin, such as the to-be-written GoogleVisionPagedImagePlugin

Location:

gs3-extensions/structured-image/trunk/perllib/plugins

Files:

: 1 added
: 1 edited

GoogleVisionAPIConverter.pm (added)
GoogleVisionImagePlugin.pm (modified) (4 diffs)

Legend:

: Unmodified
: Added
: Removed

gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionImagePlugin.pm

-              r36247
+              r36988
 use strict;
+no strict 'refs'; # allow filehandles to be variables and viceversa
+no strict 'subs';
+no  strict 'refs'; # allow filehandles to be variables and viceversa
+no  strict 'subs';
+use utf8;
+use JSON qw( from_json );
+#use Data::Dumper;
 use gsprintf;
 …
 use ImagePlugin;
 use BaseMediaConverter;
+use GoogleVisionAPIConverter;
-use utf8;
-use JSON qw( from_json );
 sub BEGIN {
     @GoogleVisionImagePlugin::ISA = ('ImagePlugin', 'BaseMediaConverter');
+    @GoogleVisionImagePlugin::ISA = ('ImagePlugin', 'GoogleVisionAPIConverter');
+}
+my $arguments =
+    [ { 'name' => "enable_image_labelling",
+    'desc' => "{GoogleVisionImagePlugin.enable_image_labelling}",
+    'type' => "flag",
+    'reqd' => "no" },
+    { 'name' => "enable_image_ocr",
+    'desc' => "{GoogleVisionImagePlugin.enable_image_ocr}",
+    'type' => "flag",
+    'reqd' => "no" },
+    { 'name' => "enable_document_ocr",
+    'desc' => "{GoogleVisionImagePlugin.enable_document_ocr}",
+    'type' => "flag",
+    'reqd' => "no" }
+];
+my $arguments = [];
 my $options = { 'name'     => "GoogleVisionImagePlugin",
 …
     push(@{$hashArgOptLists->{"OptList"}},$options);
     new BaseMediaConverter($pluginlist, $inputargs, $hashArgOptLists, 1);
+    new GoogleVisionAPIConverter($pluginlist, $inputargs, $hashArgOptLists, 1);
     my $self = new ImagePlugin($pluginlist, $inputargs, $hashArgOptLists);
 …
+}
-sub begin {
-    my $self = shift (@_);
-    my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
-    if ($self->{'enable_image_ocr'} && $self->{'enable_document_ocr'}) {
-        print STDERR "Please use the following command syntax for vision types: (--enable_image_ocr | --enable_document_ocr) [--enable_image_labelling]\n";
-        print STDERR "\t\t --enable_image_ocr : optical character recognition for text within images\n";
-        print STDERR "\t\t --enable_document_ocr : optical character recognition for text within documents\n";
-        print STDERR "\t\t --enable_image_labelling : annotation labeling for objects within images\n";
-        exit(2);
+    }
-    $self->SUPER::begin(@_);
+}
-sub vision_monitor_line {
-    my ($line) = @_;
-    my $had_error = 0;
-    my $generate_dot = 0;
-    if ($line =~ m/^.*$/)
+    {
-        $generate_dot = 1;
+    }
-    return ($had_error,$generate_dot);
+}
-sub run_convert {
-    my $self = shift (@_);
-    my ($base_dir,$filename,$file,$doc_obj) = @_;
-    my $section = $doc_obj->get_top_section();
-    my $verbosity = $self->{'verbosity'};
-    my $outhandle = $self->{'outhandle'};
-    print $outhandle "----- GoogleVisionImagePlugin run_convert -----\n";
-    # print STDERR "*****", $self->{'enable_document_ocr'}, "\n";
-    my @vision_type = (); # array containing target ocr / labelling type(s)
-    if ($self->{'enable_image_labelling'}) { push(@vision_type, "enable_image_labelling"); }
-    if ($self->{'enable_image_ocr'}) { push(@vision_type, "enable_image_ocr"); }
-    if ($self->{'enable_document_ocr'}) { push(@vision_type, "enable_document_ocr"); }
-    my $vision_type_length = @vision_type; # assigning scalar var to array returns length
-    if ($vision_type_length != 0) {
-        $self->init_cache_for_file($filename);
-        my $cached_image_dir = $self->{'cached_dir'};
-        # my $audio_root = $self->{'cached_file_root'};
-        # my $filename_no_path = &File::Basename::basename($filename);
-        my $ofile = "google-vision-output.json";
-        my $ofilename = &FileUtils::filenameConcatenate($cached_image_dir,"google-vision-output.json");
-        my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
-        my $credentials_filename = &FileUtils::filenameConcatenate($collect_dir, "etc", "atea-storage-cd63a39dfeb5.json");
-        if ($vision_type_length == 1) {
-            my $vision_type_first = $vision_type[0];
-            my $vision_cmd = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename\"";
-            $self->run_vision($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type_first, $section, $doc_obj);
-        } elsif ($vision_type_length == 2) {
-            my $vision_type_first = $vision_type[0];
-            my $vision_type_second = $vision_type[1];
-            my $ofilename_first = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_first . "-google-vision-output.json");
-            my $ofilename_second = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_second . "-google-vision-output.json");
-            my $vision_cmd_1 = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_first\"";
-            my $vision_cmd_2 = "vision.py --$vision_type_second --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_second\"";
-            $self->run_vision($file, $filename, $ofile, $ofilename_first, $vision_cmd_1, $vision_type_first, $section, $doc_obj);
-            $self->run_vision($file, $filename, $ofile, $ofilename_second, $vision_cmd_2, $vision_type_second, $section, $doc_obj);
+        }
+    }
-    return "json";
+}
-sub run_vision {
-    use Data::Dumper;
-    my $self = shift (@_);
-    my ($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type, $section, $doc_obj) = @_;
-    my $vision_regenerated;
-    my $vision_result;
-    my $vision_error;
-    my $print_info = { 'message_prefix' => "GoogleVisionAPI",
-        'message' => "Sending $file to GoogleVisionAPI using vision.py" };
-    ($vision_regenerated,$vision_result,$vision_error)
-        = $self->run_cached_general_cmd($vision_cmd,$filename,$ofilename,$print_info);
-    $doc_obj->associate_file($ofilename,$vision_type . $ofile,"application/json",$section);
-    my $json_text = do { # read in json file
-        open(my $json_fh, "<:encoding(UTF-8)", $ofilename)
-            or die("Can't open \"$ofilename\": $!\n");
-        local $/;
-        <$json_fh>
-    };
-    my $cursection = $doc_obj->get_top_section(); # get top section for text append
-    my $decoded_json = from_json($json_text);
-    my $ocr_text;
-    if ($vision_type eq "enable_document_ocr" || $vision_type eq "enable_image_ocr") {
-        $ocr_text = $decoded_json->{textAnnotations}[0]{description}; # access full ocr content
-        $doc_obj->add_utf8_text( $cursection, $ocr_text); # append text to section
-        my $blocks = $decoded_json->{fullTextAnnotation}{pages}[0]{blocks};
-        my %text_and_language;
-        foreach my $block (@{ $blocks }) {
-            foreach my $paragraph (@{ $block->{paragraphs} }) {
-                foreach my $word (@{ $paragraph->{words} }) {
-                    my $detected_language = $word->{property}{detectedLanguages}[0]{languageCode} || "no_lang";
-                    my $word_text = "";
-                    foreach my $letter (@{ $word->{symbols} }) {
-                        $word_text .= $letter->{text};
+                    }
-                    $text_and_language{$detected_language} .= $word_text . " ";
+                }
+            }
+        }
-        for (keys %text_and_language) {
-            $doc_obj->add_utf8_metadata ($cursection, "z_" . $_, $text_and_language{$_});
+        }
-    } elsif ($vision_type eq "enable_image_labelling") {
-        $ocr_text = $decoded_json->{labelAnnotations};
-        foreach my $label (@{ $ocr_text }) {
-            # write to metadata : 'description'='Book' 'score'='0.9' 'topicality'='0.9' 'mid'='/m/0123'
-            $doc_obj->add_utf8_metadata ($cursection, "description", $label->{description});
-            $doc_obj->add_utf8_metadata ($cursection, "score", $label->{score});
-            $doc_obj->add_utf8_metadata ($cursection, "topicality", $label->{topicality});
-            $doc_obj->add_utf8_metadata ($cursection, "mid", $label->{mid});
-            # write to metadata : 'descriptions'='Book' 'Book_score'='0.9' 'Book_topicality'='0.9' 'Book_mid'='/m/0123'
-            $doc_obj->add_utf8_metadata ($cursection, "descriptions", $label->{description});
-            $doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_score", $label->{score});
-            $doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_topicality", $label->{topicality});
-            $doc_obj->add_utf8_metadata ($cursection, $label->{description} . "_mid", $label->{mid});
+        }
+    }
+}
 # do plugin specific processing of doc_obj

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 36988

Legend:

gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionImagePlugin.pm

Download in other formats: