Changeset 37046


Ignore:
Timestamp:
2022-12-23T10:19:59+13:00 (16 months ago)
Author:
davidb
Message:

Code extended to now generate Open Annotation (JSON format) of the OCR'd blocks of text; some refinement of the existing Google Vision perl code

Location:
gs3-extensions/structured-image/trunk/perllib/plugins
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionAPIConverter.pm

    r37010 r37046  
    4444
    4545use utf8;
    46 use JSON qw( from_json );
     46use JSON; # qw( from_json, encode_json );
    4747
    4848sub BEGIN {
     
    128128    my $outhandle = $self->{'outhandle'};
    129129    print $outhandle "----- GoogleVisionAPIConveter run_gv_convert -----\n";
    130     # print STDERR "*****", $self->{'enable_document_ocr'}, "\n";
    131 
    132     my @vision_type = (); # array containing target ocr / labelling type(s)
    133 
    134     if ($self->{'enable_image_labelling'}) { push(@vision_type, "enable_image_labelling"); }
    135     if ($self->{'enable_image_ocr'})       { push(@vision_type, "enable_image_ocr");       }
    136     if ($self->{'enable_document_ocr'})    { push(@vision_type, "enable_document_ocr");    }
    137 
    138     my $vision_type_length = @vision_type; # assigning scalar var to array returns length
    139 
    140     if ($vision_type_length != 0) {
     130
     131    my @vision_types = (); # array containing target ocr / labelling type(s)
     132
     133    if ($self->{'enable_image_labelling'}) { push(@vision_types, "enable_image_labelling"); }
     134    if ($self->{'enable_image_ocr'})       { push(@vision_types, "enable_image_ocr");       }
     135    if ($self->{'enable_document_ocr'})    { push(@vision_types, "enable_document_ocr");    }
     136
     137    my $vision_types_length = scalar(@vision_types);
     138
     139    if ($vision_types_length != 0) {
    141140       
    142141        $self->init_cache_for_file($filename);
    143142        my $cached_image_dir = $self->{'cached_dir'};
    144         # my $audio_root = $self->{'cached_file_root'};
     143    # my $cached_image_root = $self->{'cached_file_root'};
    145144
    146145        # my $filename_no_path = &File::Basename::basename($filename);
    147146
    148         my $ofile = "google-vision-output.json";
    149         my $ofilename = &FileUtils::filenameConcatenate($cached_image_dir,"google-vision-output.json");
    150147        my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
    151148        my $credentials_filename = &FileUtils::filenameConcatenate($collect_dir, "etc", $self->{'google_application_credentials'});
    152149
    153         if ($vision_type_length == 1) {
    154             my $vision_type_first = $vision_type[0];
    155             my $vision_cmd = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename\"";
    156 
    157             $self->run_vision($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type_first, $section, $doc_obj);
    158         }
    159     elsif ($vision_type_length == 2) {
    160             my $vision_type_first = $vision_type[0];
    161             my $vision_type_second = $vision_type[1];
    162 
    163             my $ofilename_first = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_first . "-google-vision-output.json");
    164             my $ofilename_second = &FileUtils::filenameConcatenate($cached_image_dir, $vision_type_second . "-google-vision-output.json");
    165 
    166             my $vision_cmd_1 = "vision.py --$vision_type_first --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_first\"";
    167             my $vision_cmd_2 = "vision.py --$vision_type_second --credentials \"$credentials_filename\" \"$filename\" \"$ofilename_second\"";
    168 
    169             $self->run_vision($file, $filename, $ofile, $ofilename_first, $vision_cmd_1, $vision_type_first, $section, $doc_obj);
    170             $self->run_vision($file, $filename, $ofile, $ofilename_second, $vision_cmd_2, $vision_type_second, $section, $doc_obj);
    171         }
     150    for my $vision_type (@vision_types) {
     151
     152        my $ofile = "${vision_type}-google-vision-output.json";
     153        my $ofilename = &FileUtils::filenameConcatenate($cached_image_dir,$ofile);
     154
     155        my $vision_cmd = "vision.py --$vision_type --credentials \"$credentials_filename\" \"$filename\" \"$ofilename\"";
     156
     157            $self->run_vision($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type, $doc_obj,$section);
     158
     159        if ($vision_type eq "enable_document_ocr") {
     160        my $gv_dococr_rec = { 'filename' => $ofilename, 'section' => $section };
     161       
     162        push(@{$self->{'gv-dococr-json-filename-recs'}}, $gv_dococr_rec);
     163        }       
     164    }
    172165    }
    173166
     
    175168}
    176169
    177 sub run_vision {
    178     use Data::Dumper;
    179     my $self = shift (@_);
    180     my ($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type, $section, $doc_obj) = @_;
     170sub gv_ocr_bounding_box_rect
     171{
     172    my $self = shift (@_);
     173    my ($gv_block,) = @_;
     174
     175    my $bbox_rect = undef;
     176   
     177    my $gv_boundingBox = $gv_block->{'boundingBox'};
     178
     179    my $gv_vertices = $gv_boundingBox->{'vertices'};
     180    my $gv_num_vertices = scalar(@$gv_vertices);
     181
     182    if ($gv_num_vertices > 0) {
     183    # print STDERR "**** gs_vertices[0] = ", JSON::encode_json($gv_vertices->[0]), "\n";
     184
     185    # Discovered that sometimes the 'x' value in the 'vertices' structure is not defined
     186    # So can't rely on picking up $gv_vertices->[0 for 'x' and 'y'
     187    # start off with 'undef' and test for !defined in for-loop
     188
     189    my $min_x = undef;
     190    my $min_y = undef;
     191    my $max_x = undef;
     192    my $max_y = undef;
     193
     194   
     195    for (my $v=0; $v<$gv_num_vertices; $v++) {
     196        my $x = $gv_vertices->[$v]->{'x'};
     197        my $y = $gv_vertices->[$v]->{'y'};
     198
     199        if (defined $x) {
     200        $min_x = $x if (!defined $min_x || ($x < $min_x));
     201        $max_x = $x if (!defined $max_x || ($x > $max_x));
     202        }
     203
     204        if (defined $y) {
     205        $min_y = $y if (!defined $min_y || ($y < $min_y));
     206        $max_y = $y if (!defined $max_y || ($y > $max_y));
     207        }
     208    }
     209   
     210    my $x_org = $min_x;
     211    my $y_org = $min_y;
     212    my $x_dim = $max_x - $min_x +1;
     213    my $y_dim = $max_y - $min_y +1;
     214   
     215    $bbox_rect = { "x_org" => $x_org, "y_org" => $y_org, "x_dim" => $x_dim, "y_dim" => $y_dim};
     216    }
     217   
     218    return $bbox_rect;
     219}
     220
     221sub run_vision
     222{
     223    my $self = shift (@_);
     224    my ($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type, $doc_obj,$section) = @_;
    181225
    182226    my $vision_regenerated;
     
    184228    my $vision_error;
    185229
    186     my $print_info = { 'message_prefix' => "GoogleVisionAPI",
    187         'message' => "Sending $file to GoogleVisionAPI using vision.py" };
     230    my $print_info = {
     231    'message_prefix' => "GoogleVisionAPI",
     232    'message'        => "Sending $file to GoogleVisionAPI using vision.py"
     233    };
    188234
    189235    ($vision_regenerated,$vision_result,$vision_error)
     
    194240    # => add in section number as part of the file name to avoid clashes
    195241   
    196     my $gv_assoc_prefix = $vision_type;
    197     $gv_assoc_prefix =~ s/^enable_/gv_/;
    198 
    199242    my $section_file_suffix = $section;
    200243    $section_file_suffix =~ s/\./_/g;
    201244
    202     my $assoc_ofile = $gv_assoc_prefix.$ofile;
     245    my $assoc_ofile = $ofile;
     246    $assoc_ofile =~ s/^enable_/gv_/;
     247    $assoc_ofile =~ s/-google-vision//;
    203248    $assoc_ofile =~ s/\.(.*?)$/$section_file_suffix.$1/;
    204249   
     
    212257    };
    213258
    214     my $decoded_json = from_json($json_text);
     259    my $decoded_json = JSON::from_json($json_text);
     260   
    215261    my $ocr_text;
    216262    if ($vision_type eq "enable_document_ocr" || $vision_type eq "enable_image_ocr") {
    217         $ocr_text = $decoded_json->{textAnnotations}[0]{description}; # access full ocr content
    218         $doc_obj->add_utf8_text( $section, $ocr_text); # append text to section
    219 
    220         my $blocks = $decoded_json->{fullTextAnnotation}{pages}[0]{blocks};
     263        $ocr_text = $decoded_json->{'textAnnotations'}[0]{'description'}; # access full ocr content
     264        $doc_obj->add_utf8_text($section, $ocr_text); # append text to section
     265
     266        my $blocks = $decoded_json->{'fullTextAnnotation'}{'pages'}[0]{'blocks'};
    221267        my %text_and_language;
    222268
    223         foreach my $block (@{ $blocks }) {
    224             foreach my $paragraph (@{ $block->{paragraphs} }) {
    225                 foreach my $word (@{ $paragraph->{words} }) {
    226                     my $detected_language = $word->{property}{detectedLanguages}[0]{languageCode} || "no_lang";
     269        foreach my $block (@{$blocks}) {
     270            foreach my $paragraph (@{$block->{'paragraphs'}}) {
     271                foreach my $word (@{$paragraph->{'words'}}) {
     272                    my $detected_language = $word->{'property'}{'detectedLanguages'}[0]{'languageCode'} || "no_lang";
    227273                    my $word_text = "";
    228                     foreach my $letter (@{ $word->{symbols} }) {
    229                         $word_text .= $letter->{text};
     274                    foreach my $letter (@{$word->{'symbols'}}) {
     275                        $word_text .= $letter->{'text'};
    230276                    }
    231277                    $text_and_language{$detected_language} .= $word_text . " ";
     
    235281   
    236282        for (keys %text_and_language) {
    237             $doc_obj->add_utf8_metadata ($section, "z_" . $_, $text_and_language{$_});
     283            $doc_obj->add_utf8_metadata($section, "z_" . $_, $text_and_language{$_});
    238284        }
    239285
     
    244290        $assoc_json_metaname .= "DocumentOCRJSON";
    245291
    246         $doc_obj->add_utf8_metadata ($section, "GVDocumentOCRJSON",$assoc_ofile);
     292        $doc_obj->add_utf8_metadata($section, "GVDocumentOCRJSON",$assoc_ofile);
    247293    }
    248294    else {
    249         # $vision_type eq "enable_image_ocr")
     295        # $vision_type eq "enable_image_ocr"
    250296        $assoc_json_metaname .= "ImageOCRJSON";
    251297
    252         $doc_obj->add_utf8_metadata ($section, "GVImageOCRJSON",$assoc_ofile);
    253     }
    254 
    255     $doc_obj->add_utf8_metadata ($section, $assoc_json_metaname, 1);
     298        $doc_obj->add_utf8_metadata($section, "GVImageOCRJSON",$assoc_ofile);
     299    }
     300
     301    $doc_obj->add_utf8_metadata($section, $assoc_json_metaname, 1);
    256302    }
    257303    elsif ($vision_type eq "enable_image_labelling") {
    258         $ocr_text = $decoded_json->{labelAnnotations}; 
    259         foreach my $label (@{ $ocr_text }) {
     304        $ocr_text = $decoded_json->{'labelAnnotations'};   
     305        foreach my $label (@{$ocr_text}) {
    260306            # write to metadata : 'description'='Book' 'score'='0.9' 'topicality'='0.9' 'mid'='/m/0123'
    261             $doc_obj->add_utf8_metadata ($section, "description", $label->{description});
    262             $doc_obj->add_utf8_metadata ($section, "score", $label->{score});
    263             $doc_obj->add_utf8_metadata ($section, "topicality", $label->{topicality});
    264             $doc_obj->add_utf8_metadata ($section, "mid", $label->{mid});
     307            $doc_obj->add_utf8_metadata($section, "description", $label->{'description'});
     308            $doc_obj->add_utf8_metadata($section, "score",       $label->{'score'});
     309            $doc_obj->add_utf8_metadata($section, "topicality",  $label->{'topicality'});
     310            $doc_obj->add_utf8_metadata($section, "mid",         $label->{'mid'});
    265311
    266312        # write to metadata : 'descriptions'='Book' 'Book_score'='0.9' 'Book_topicality'='0.9' 'Book_mid'='/m/0123'
    267             $doc_obj->add_utf8_metadata ($section, "descriptions", $label->{description});
    268             $doc_obj->add_utf8_metadata ($section, $label->{description} . "_score", $label->{score});
    269             $doc_obj->add_utf8_metadata ($section, $label->{description} . "_topicality", $label->{topicality});
    270             $doc_obj->add_utf8_metadata ($section, $label->{description} . "_mid", $label->{mid});
     313            $doc_obj->add_utf8_metadata($section, "descriptions", $label->{'description'});
     314            $doc_obj->add_utf8_metadata($section, $label->{'description'} . "_score", $label->{'score'});
     315            $doc_obj->add_utf8_metadata($section, $label->{'description'} . "_topicality", $label->{'topicality'});
     316            $doc_obj->add_utf8_metadata($section, $label->{'description'} . "_mid", $label->{'mid'});
    271317
    272318        }
    273319   
    274     $doc_obj->add_utf8_metadata ($section, "HasGoogleVisionImageLabellingJSON", 1);
    275     $doc_obj->add_utf8_metadata ($section, "GVImageLabellingJSON",$assoc_ofile);
    276 
    277     }
     320    $doc_obj->add_utf8_metadata($section, "HasGoogleVisionImageLabellingJSON", 1);
     321    $doc_obj->add_utf8_metadata($section, "GVImageLabellingJSON",$assoc_ofile);
     322
     323    }
     324}
     325
     326sub start_openannotation_list
     327{
     328    my $self = shift (@_);
     329    my ($doc_obj) = @_;
     330
     331    my $OID = $doc_obj->get_OID();
     332
     333    my $openannotation_list = {
     334        "\@context" => "http://www.shared-canvas.org/ns/context.json",
     335    # "\@id"      => "https://iiif.harvardartmuseums.org/manifests/object/299843/list/47174896",
     336    "\@id"      => "${OID}/openannotation-list.json",  # #### **** make full URL to be unique? or greenstone3:site:collect:OID ??
     337    "\@type"    => "sc:AnnotationList",
     338    "resources" => []
     339    };
     340
     341    $self->{'openannotation-list'} = $openannotation_list;
     342}
     343
     344
     345   
     346sub convert_gvocr_to_openannotation_resource
     347{
     348    my $self = shift (@_);
     349    my ($gv_blocks, $doc_obj, $section) = @_;
     350
     351    my $OID = $doc_obj->get_OID();
     352    my $OID_with_section = ($section ne "") ? "${OID}_$section" : $OID;
     353    $section = 1 if ($section eq ""); # occurs when the document is a single image
     354   
     355   
     356    # Example Open Annotation resource (for single annotation):
     357#      {
     358#             "@context": "http://iiif.io/api/presentation/2/context.json",
     359#             "@id": "https://iiif.harvardartmuseums.org/annotations/9641482",
     360#             "@type": "oa:Annotation",
     361#             "motivation": [
     362#                 "oa:commenting"
     363#             ],
     364#             "on": {
     365#                 "@type": "oa:SpecificResource",
     366#                 "full": "https://iiif.harvardartmuseums.org/manifests/object/299843/canvas/canvas-47174896",
     367#                 "selector": {
     368#                     "@type": "oa:FragmentSelector",
     369#                     "value": "xywh=622,591,642,940"
     370#                 },
     371#                 "within": {
     372#                     "@id": "https://iiif.harvardartmuseums.org/manifests/object/299843",
     373#                     "@type": "sc:Manifest"
     374#                 }
     375#             },
     376#             "resource": [
     377#                 {
     378#                     "@type": "dctypes:Text",
     379#                     "chars": "<p>age: 35-52<br/>gender: Female(66.337677%)<br/>CALM: 55.438412%<br/>CONFUSED: 3.949288%<br/>SURPRISED: 2.33092%<br/>DISGUSTED:
     380# 0.545727%<br/>HAPPY: 1.549943%<br/>ANGRY: 2.082294%<br/>SAD: 34.103416%<br/></p><p>Generated by AWS Rekognition</p>",
     381#                     "format": "text/html"
     382#                 }
     383#             ]
     384#         },
     385
     386    my $self_openannotation_resources = $self->{'openannotation-list'}->{'resources'};
     387
     388    my $block_i = 0;
     389   
     390    foreach my $block (@{$gv_blocks}) {
     391    $block_i++;
     392   
     393    my $openannotation_resource = {
     394        "\@context"  => "http://iiif.io/api/presentation/2/context.json",
     395        # "\@id"       => "https://iiif.harvardartmuseums.org/annotations/9641482",
     396        "\@id"       => "${OID_with_section}/annotation/gv-block-$block_i", # #### **** make full URL to be unique? or greenstone3:site:collect:OID ??
     397        "\@type"     => "oa:Annotation",
     398        "motivation" => [ "oa:commenting" ]
     399    };
     400   
     401
     402    my $bbox_rect = $self->gv_ocr_bounding_box_rect($block);
     403    my $bb_x_org = $bbox_rect->{'x_org'};
     404    my $bb_y_org = $bbox_rect->{'y_org'};
     405    my $bb_x_dim = $bbox_rect->{'x_dim'};
     406    my $bb_y_dim = $bbox_rect->{'y_dim'};
     407   
     408    my $openannotation_on = {
     409        "\@type" => "oa:SpecificResource",
     410        # "full"   => "https://iiif.harvardartmuseums.org/manifests/object/299843/canvas/canvas-47174896",
     411        "full"   => "${OID}/canvas/$section", # doc id + /canvas + page-i/sect # #### **** make full URL to be unique? or greenstone3:site:collect:OID ??
     412        "selector" => {
     413        "\@type" => "oa:FragmentSelector",
     414        "value"  => "xywh=${bb_x_org},${bb_y_org},${bb_x_dim},${bb_y_dim}"
     415        },
     416        "within" => {
     417        #"\@id"   => "https://iiif.harvardartmuseums.org/manifests/object/299843",
     418        "\@id"   => "${OID_with_section}/manifest", # #### **** make full URL to be unique? or greenstone3:site:collect:OID... ??
     419        "\@type" => "sc:Manifest"
     420        }
     421    };
     422    $openannotation_resource->{'on'} = $openannotation_on;
     423       
     424
     425    my $block_text_html = "";
     426   
     427    foreach my $paragraph (@{$block->{'paragraphs'}}) {
     428        my $para_text = "";
     429       
     430        foreach my $word (@{$paragraph->{'words'}}) {
     431        my $word_text = "";
     432       
     433        foreach my $letter (@{$word->{'symbols'}}) {
     434            $word_text .= $letter->{'text'};
     435        }
     436
     437        $para_text .= " " if $para_text ne "";
     438        $para_text .= $word_text;
     439        }
     440
     441        $block_text_html .= "<p>\n$para_text\n</p>\n\n";
     442    }
     443       
     444    my $openannotation_inner_resource = [{
     445        "\@type"  => "dctypes:Text",
     446        "chars"   => "$block_text_html",
     447        "format"  => "text/html"
     448    }];
     449
     450    $openannotation_resource->{'resource'} = $openannotation_inner_resource;
     451
     452    push(@$self_openannotation_resources,$openannotation_resource);     
     453    }
     454}
     455
     456
     457sub convert_and_append_openannotation_resources
     458{
     459    my $self = shift (@_);
     460    my ($gv_dococr_json_filename, $doc_obj, $section) = @_;
     461
     462
     463    # Read in JSON file
     464    my $json_text = do {
     465    open(my $json_fh, "<:encoding(UTF-8)", $gv_dococr_json_filename)
     466        or die("Can't open \"$gv_dococr_json_filename\": $!\n");
     467    local $/;
     468    <$json_fh>
     469    };
     470   
     471    my $decoded_json = JSON::from_json($json_text);
     472   
     473    my $gv_blocks = $decoded_json->{'fullTextAnnotation'}->{'pages'}->[0]->{'blocks'};
     474
     475    $self->convert_gvocr_to_openannotation_resource($gv_blocks, $doc_obj, $section);
     476}
     477
     478
     479
     480sub end_openannotation_list
     481{
     482    my $self = shift (@_);
     483    my ($doc_obj,$json_ofilename) = @_;
     484
     485    my $ret_status = 1;
     486   
     487    if (!open(JOUT, "> $json_ofilename")) {
     488    print STDERR "Error: Failed save Open Annotation List JSON to \"$json_ofilename\":\n    $!\n";
     489        $ret_status = 0;
     490    }
     491    else {
     492    binmode(JOUT, ":utf8");
     493   
     494    my $openannotation_list = $self->{'openannotation-list'};
     495    my $openannotation_list_json_text = JSON::encode_json($openannotation_list);
     496   
     497    print JOUT $openannotation_list_json_text;
     498    close JOUT;
     499   
     500    }   
     501   
     502    $self->{'openannotation-list'} = undef;
     503   
     504    return $ret_status;
     505}
     506
     507sub openannotation_list_associate_json
     508{
     509    my $self = shift (@_);
     510    my ($doc_obj, $gv_dococr_json_filename_recs) = @_;
     511
     512    my $outhandle = $self->{'outhandle'};
     513   
     514    # Guaranteed to have at least one value in gv_dococr_json_filename_recs
     515    #
     516    # Legacy code used to have a '\d+' just before the '.json' reflecting page/section number
     517    # Keep this in the regular expression, just in case,
     518    #
     519    my ($gv_dococr_filename_root) = ($gv_dococr_json_filename_recs->[0]->{'filename'} =~ m/^(.+)(?:\d+)?\.json$/);
     520
     521    # slight of hand so new directory spot in cache_dir picked out is where we want it!
     522    $gv_dococr_filename_root .= "/";
     523   
     524    my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
     525    my $toplevel_cached_dir = &FileUtils::filenameConcatenate($collect_dir,"cached");
     526   
     527    $self->init_cache_for_file($gv_dococr_filename_root);
     528    my $cached_dir = $self->{'cached_dir'};
     529
     530    my $assoc_openannotation_json_ofile = "openannotation-list.json";
     531    my $cached_openannotation_json_ofilename = &FileUtils::filenameConcatenate($cached_dir,$assoc_openannotation_json_ofile);   
     532
     533    my $needs_json_regen = 0;
     534
     535    if (!-f $cached_openannotation_json_ofilename) {
     536    $needs_json_regen = 1;
     537    }
     538    else {
     539    for my $gv_json_filename_rec (@$gv_dococr_json_filename_recs) {
     540        my $gv_json_filename = $gv_json_filename_rec->{'filename'};
     541        if (-M $gv_json_filename > -M $cached_openannotation_json_ofilename) {
     542        $needs_json_regen = 1;
     543        last;
     544        }
     545    }
     546    }
     547
     548    my $saved_ok = 1;
     549   
     550    if ($needs_json_regen) {
     551
     552    print $outhandle "  OpenAnnotation-List: Generating $cached_openannotation_json_ofilename\n";
     553   
     554    $self->start_openannotation_list($doc_obj);
     555
     556    for my $gv_json_filename_rec (@$gv_dococr_json_filename_recs) {
     557        my $gv_json_filename = $gv_json_filename_rec->{'filename'};
     558        my $section = $gv_json_filename_rec->{'section'};
     559        $self->convert_and_append_openannotation_resources($gv_json_filename, $doc_obj,$section);
     560    }
     561   
     562    $saved_ok = $self->end_openannotation_list($doc_obj,$cached_openannotation_json_ofilename);
     563    }
     564   
     565    if ($saved_ok) {
     566    print $outhandle "  OpenAnnotation-List: Cached file $cached_openannotation_json_ofilename already exists\n";
     567   
     568    my $top_section = $doc_obj->get_top_section();
     569    $doc_obj->associate_file($cached_openannotation_json_ofilename,$assoc_openannotation_json_ofile,"application/json",$top_section);
     570    }
     571
     572    return $saved_ok;
     573}
     574
     575sub opt_run_gen_openannotation
     576{   
     577    my $self = shift (@_);
     578    my ($doc_obj) = @_;
     579
     580    my $gv_dococr_json_filename_recs = $self->{'gv-dococr-json-filename-recs'};
     581    my $num_gv_dococr_json_filename_recs = scalar(@$gv_dococr_json_filename_recs);
     582
     583    my $ret_val_ok = 1;
     584   
     585    if ($num_gv_dococr_json_filename_recs > 0) {
     586    $ret_val_ok = $self->openannotation_list_associate_json($doc_obj,$gv_dococr_json_filename_recs);   
     587    }
     588
     589    return $ret_val_ok;
    278590}
    279591
    2805921;
    281593
    282 
    283 
    284 
    285 
    286 
    287 
    288 
    289 
    290 
    291 
  • gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionImagePlugin.pm

    r37000 r37046  
    8282    my $utf8_filename_no_path = $self->filepath_to_utf8($filename_no_path);
    8383    my $url_encoded_filename = &util::rename_file($utf8_filename_no_path, $self->{'file_rename_method'});
     84
     85    $self->{'gv-dococr-json-filename-recs'} = [];
     86
     87    # The following also builds up the openanotation_list 'resources' array   
     88    $self->run_gv_convert($filename_full_path,$url_encoded_filename,$doc_obj);
     89
     90    $self->SUPER::process(@_);
     91}
     92
     93sub post_process_doc_obj {
     94    my $self = shift (@_); 
     95    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
     96
     97    my $ret_val_ok = $self->SUPER::post_process_doc_obj(@_);
    8498   
    85     $self->run_gv_convert($filename_full_path,$url_encoded_filename,$doc_obj);
     99    if ($ret_val_ok) {
     100    $ret_val_ok = $self->opt_run_gen_openannotation($doc_obj);
     101    }
    86102   
    87     $self->SUPER::process(@_);
     103    return $ret_val_ok;
    88104}
    89105
  • gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionPagedImagePlugin.pm

    r37000 r37046  
    8686
    8787
     88sub init_new_doc_item
     89{   
     90    my $self = shift (@_);
     91    my ($filename_full_path, $processor, $metadata) = @_;
     92
     93    my $doc_obj = $self->SUPER::init_new_doc_item(@_);
     94
     95    $self->{'gv-dococr-json-filename-recs'} = [];
     96   
     97    return $doc_obj;   
     98}
     99
     100
    88101sub process_image {
    89102    my $self = shift(@_);
     
    94107   
    95108    $self->run_gv_convert($filename_full_path,$url_encoded_filename,$doc_obj,$section);
    96 
     109   
    97110    return $self->SUPER::process_image(@_);
    98111}
    99112
     113sub post_process_doc_obj {
     114    my $self = shift (@_); 
     115    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
     116
     117    my $ret_val_ok = $self->SUPER::post_process_doc_obj(@_);
     118
     119    if ($ret_val_ok) {
     120    $ret_val_ok = $self->opt_run_gen_openannotation($doc_obj);
     121    }
     122   
     123    return $ret_val_ok;
     124}
     125
     126
    1001271;
Note: See TracChangeset for help on using the changeset viewer.