Changeset 37010
- Timestamp:
- 2022-12-10T18:19:13+13:00 (12 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionAPIConverter.pm
r37000 r37010 133 133 134 134 if ($self->{'enable_image_labelling'}) { push(@vision_type, "enable_image_labelling"); } 135 if ($self->{'enable_image_ocr'}) { push(@vision_type, "enable_image_ocr");}136 if ($self->{'enable_document_ocr'}) { push(@vision_type, "enable_document_ocr");}135 if ($self->{'enable_image_ocr'}) { push(@vision_type, "enable_image_ocr"); } 136 if ($self->{'enable_document_ocr'}) { push(@vision_type, "enable_document_ocr"); } 137 137 138 138 my $vision_type_length = @vision_type; # assigning scalar var to array returns length … … 156 156 157 157 $self->run_vision($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type_first, $section, $doc_obj); 158 } elsif ($vision_type_length == 2) { 158 } 159 elsif ($vision_type_length == 2) { 159 160 my $vision_type_first = $vision_type[0]; 160 161 my $vision_type_second = $vision_type[1]; … … 189 190 = $self->run_cached_general_cmd($vision_cmd,$filename,$ofilename,$print_info); 190 191 191 $doc_obj->associate_file($ofilename,$vision_type . $ofile,"application/json",$section); 192 # Need to work a bit harder in setting up the associated JSON file 193 # => strip of 'enable_' in favour of 'gv_' 194 # => add in section number as part of the file name to avoid clashes 195 196 my $gv_assoc_prefix = $vision_type; 197 $gv_assoc_prefix =~ s/^enable_/gv_/; 198 199 my $section_file_suffix = $section; 200 $section_file_suffix =~ s/\./_/g; 201 202 my $assoc_ofile = $gv_assoc_prefix.$ofile; 203 $assoc_ofile =~ s/\.(.*?)$/$section_file_suffix.$1/; 204 205 $doc_obj->associate_file($ofilename,$assoc_ofile,"application/json",$section); 192 206 193 207 my $json_text = do { # read in json file … … 198 212 }; 199 213 200 #my $cursection = $doc_obj->get_top_section(); # get top section for text append201 202 214 my $decoded_json = from_json($json_text); 203 215 my $ocr_text; … … 221 233 } 222 234 } 235 223 236 for (keys %text_and_language) { 224 237 $doc_obj->add_utf8_metadata ($section, "z_" . $_, $text_and_language{$_}); 225 238 } 226 239 227 228 } elsif ($vision_type eq "enable_image_labelling") { 229 $ocr_text = $decoded_json->{labelAnnotations}; 240 241 my $assoc_json_metaname = "HasGoogleVision"; 242 243 if ($vision_type eq "enable_document_ocr") { 244 $assoc_json_metaname .= "DocumentOCRJSON"; 245 246 $doc_obj->add_utf8_metadata ($section, "GVDocumentOCRJSON",$assoc_ofile); 247 } 248 else { 249 # $vision_type eq "enable_image_ocr") 250 $assoc_json_metaname .= "ImageOCRJSON"; 251 252 $doc_obj->add_utf8_metadata ($section, "GVImageOCRJSON",$assoc_ofile); 253 } 254 255 $doc_obj->add_utf8_metadata ($section, $assoc_json_metaname, 1); 256 } 257 elsif ($vision_type eq "enable_image_labelling") { 258 $ocr_text = $decoded_json->{labelAnnotations}; 230 259 foreach my $label (@{ $ocr_text }) { 231 260 # write to metadata : 'description'='Book' 'score'='0.9' 'topicality'='0.9' 'mid'='/m/0123' … … 234 263 $doc_obj->add_utf8_metadata ($section, "topicality", $label->{topicality}); 235 264 $doc_obj->add_utf8_metadata ($section, "mid", $label->{mid}); 236 # write to metadata : 'descriptions'='Book' 'Book_score'='0.9' 'Book_topicality'='0.9' 'Book_mid'='/m/0123' 265 266 # write to metadata : 'descriptions'='Book' 'Book_score'='0.9' 'Book_topicality'='0.9' 'Book_mid'='/m/0123' 237 267 $doc_obj->add_utf8_metadata ($section, "descriptions", $label->{description}); 238 268 $doc_obj->add_utf8_metadata ($section, $label->{description} . "_score", $label->{score}); … … 241 271 242 272 } 273 274 $doc_obj->add_utf8_metadata ($section, "HasGoogleVisionImageLabellingJSON", 1); 275 $doc_obj->add_utf8_metadata ($section, "GVImageLabellingJSON",$assoc_ofile); 276 243 277 } 244 278 }
Note:
See TracChangeset
for help on using the changeset viewer.