Changeset 37010


Ignore:
Timestamp:
2022-12-10T18:19:13+13:00 (17 months ago)
Author:
davidb
Message:

Changes to make it easier to display OCR bounding-box info in PagedImage doc view

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionAPIConverter.pm

    r37000 r37010  
    133133
    134134    if ($self->{'enable_image_labelling'}) { push(@vision_type, "enable_image_labelling"); }
    135     if ($self->{'enable_image_ocr'}) { push(@vision_type, "enable_image_ocr"); }
    136     if ($self->{'enable_document_ocr'}) { push(@vision_type, "enable_document_ocr"); }
     135    if ($self->{'enable_image_ocr'})       { push(@vision_type, "enable_image_ocr");      }
     136    if ($self->{'enable_document_ocr'})    { push(@vision_type, "enable_document_ocr");    }
    137137
    138138    my $vision_type_length = @vision_type; # assigning scalar var to array returns length
     
    156156
    157157            $self->run_vision($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type_first, $section, $doc_obj);
    158         } elsif ($vision_type_length == 2) {
     158        }
     159    elsif ($vision_type_length == 2) {
    159160            my $vision_type_first = $vision_type[0];
    160161            my $vision_type_second = $vision_type[1];
     
    189190        = $self->run_cached_general_cmd($vision_cmd,$filename,$ofilename,$print_info);
    190191
    191     $doc_obj->associate_file($ofilename,$vision_type . $ofile,"application/json",$section);
     192    # Need to work a bit harder in setting up the associated JSON file
     193    # => strip of 'enable_' in favour of 'gv_'
     194    # => add in section number as part of the file name to avoid clashes
     195   
     196    my $gv_assoc_prefix = $vision_type;
     197    $gv_assoc_prefix =~ s/^enable_/gv_/;
     198
     199    my $section_file_suffix = $section;
     200    $section_file_suffix =~ s/\./_/g;
     201
     202    my $assoc_ofile = $gv_assoc_prefix.$ofile;
     203    $assoc_ofile =~ s/\.(.*?)$/$section_file_suffix.$1/;
     204   
     205    $doc_obj->associate_file($ofilename,$assoc_ofile,"application/json",$section);
    192206
    193207    my $json_text = do { # read in json file
     
    198212    };
    199213
    200     #my $cursection = $doc_obj->get_top_section(); # get top section for text append
    201    
    202214    my $decoded_json = from_json($json_text);
    203215    my $ocr_text;
     
    221233            }
    222234        }
     235   
    223236        for (keys %text_and_language) {
    224237            $doc_obj->add_utf8_metadata ($section, "z_" . $_, $text_and_language{$_});
    225238        }
    226239
    227 
    228     } elsif ($vision_type eq "enable_image_labelling") {
    229         $ocr_text = $decoded_json->{labelAnnotations};
     240   
     241    my $assoc_json_metaname = "HasGoogleVision";
     242   
     243    if ($vision_type eq "enable_document_ocr") {
     244        $assoc_json_metaname .= "DocumentOCRJSON";
     245
     246        $doc_obj->add_utf8_metadata ($section, "GVDocumentOCRJSON",$assoc_ofile);
     247    }
     248    else {
     249        # $vision_type eq "enable_image_ocr")
     250        $assoc_json_metaname .= "ImageOCRJSON";
     251
     252        $doc_obj->add_utf8_metadata ($section, "GVImageOCRJSON",$assoc_ofile);
     253    }
     254
     255    $doc_obj->add_utf8_metadata ($section, $assoc_json_metaname, 1);
     256    }
     257    elsif ($vision_type eq "enable_image_labelling") {
     258        $ocr_text = $decoded_json->{labelAnnotations}; 
    230259        foreach my $label (@{ $ocr_text }) {
    231260            # write to metadata : 'description'='Book' 'score'='0.9' 'topicality'='0.9' 'mid'='/m/0123'
     
    234263            $doc_obj->add_utf8_metadata ($section, "topicality", $label->{topicality});
    235264            $doc_obj->add_utf8_metadata ($section, "mid", $label->{mid});
    236             # write to metadata : 'descriptions'='Book' 'Book_score'='0.9' 'Book_topicality'='0.9' 'Book_mid'='/m/0123'
     265
     266        # write to metadata : 'descriptions'='Book' 'Book_score'='0.9' 'Book_topicality'='0.9' 'Book_mid'='/m/0123'
    237267            $doc_obj->add_utf8_metadata ($section, "descriptions", $label->{description});
    238268            $doc_obj->add_utf8_metadata ($section, $label->{description} . "_score", $label->{score});
     
    241271
    242272        }
     273   
     274    $doc_obj->add_utf8_metadata ($section, "HasGoogleVisionImageLabellingJSON", 1);
     275    $doc_obj->add_utf8_metadata ($section, "GVImageLabellingJSON",$assoc_ofile);
     276
    243277    }
    244278}
Note: See TracChangeset for help on using the changeset viewer.