Changeset 37117
- Timestamp:
- 2023-01-07T11:42:21+13:00 (11 months ago)
- Location:
- gs3-extensions/structured-image/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionAPIConverter.pm
r37065 r37117 261 261 my $ocr_text; 262 262 if ($vision_type eq "enable_document_ocr" || $vision_type eq "enable_image_ocr") { 263 $ocr_text = $decoded_json->{'textAnnotations'}[0]{'description'}; # access full ocr content 264 $doc_obj->add_utf8_text($section, $ocr_text); # append text to section 265 266 my $blocks = $decoded_json->{'fullTextAnnotation'}{'pages'}[0]{'blocks'}; 267 my %text_and_language; 268 269 foreach my $block (@{$blocks}) { 270 foreach my $paragraph (@{$block->{'paragraphs'}}) { 271 foreach my $word (@{$paragraph->{'words'}}) { 272 my $detected_language = $word->{'property'}{'detectedLanguages'}[0]{'languageCode'} || "no_lang"; 273 my $word_text = ""; 274 foreach my $letter (@{$word->{'symbols'}}) { 275 $word_text .= $letter->{'text'}; 276 } 277 $text_and_language{$detected_language} .= $word_text . " "; 278 } 279 } 280 } 281 282 for (keys %text_and_language) { 283 $doc_obj->add_utf8_metadata($section, "z_" . $_, $text_and_language{$_}); 284 } 285 263 264 if (defined $decoded_json->{'textAnnotations'}) { 265 $ocr_text = $decoded_json->{'textAnnotations'}->[0]->{'description'}; # access full ocr content 266 $doc_obj->add_utf8_text($section, $ocr_text); # append text to section 267 268 my $blocks = $decoded_json->{'fullTextAnnotation'}->{'pages'}->[0]->{'blocks'}; 269 my %text_and_language; 270 271 foreach my $block (@{$blocks}) { 272 foreach my $paragraph (@{$block->{'paragraphs'}}) { 273 foreach my $word (@{$paragraph->{'words'}}) { 274 my $detected_language = $word->{'property'}->{'detectedLanguages'}->[0]->{'languageCode'} || "no_lang"; 275 my $word_text = ""; 276 foreach my $letter (@{$word->{'symbols'}}) { 277 $word_text .= $letter->{'text'}; 278 } 279 $text_and_language{$detected_language} .= $word_text . " "; 280 } 281 } 282 } 283 284 for (keys %text_and_language) { 285 $doc_obj->add_utf8_metadata($section, "z_" . $_, $text_and_language{$_}); 286 } 287 288 } 289 290 # Note: Even if there is no actual OCR'd text detected (if test above), 291 # stil set metadata that show that we applied the Google Vision API seeking text 286 292 287 293 my $assoc_json_metaname = "HasGoogleVision"; -
gs3-extensions/structured-image/trunk/src/js/document_extra.js
r37113 r37117 70 70 71 71 var full_text_annotation = gv_ocr_json.fullTextAnnotation; 72 73 if (!full_text_annotation) { 74 // Encountered a entry where no OCR'd text was found 75 return; 76 } 77 72 78 var pages = full_text_annotation.pages; 73 79 var num_pages = pages.length;
Note:
See TracChangeset
for help on using the changeset viewer.