Changeset 37065 for gs3-extensions
- Timestamp:
- 2022-12-28T23:51:47+13:00 (16 months ago)
- Location:
- gs3-extensions/structured-image/trunk/perllib/plugins
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionAPIConverter.pm
r37060 r37065 331 331 my $OID = $doc_obj->get_OID(); 332 332 333 my $site = $self->{'site'}; 334 my $collect = my $collect_dir = $ENV{'GSDLCOLLECTION'}; 335 336 # Implication of the following is that the generated openannotation-list JSON content 337 # is bound to the site/collection where it has been imported. 338 # => if renaming a collection at the file system level, then 339 # (i) The versios of openannotation-list*.json in the collections 'cache' dir 340 # need to be removed 341 # (ii) And collection rebuilt 342 343 my $uri_prefix = "http-greenstone://"; 344 $uri_prefix .= "${site}/" if (defined $site) && $site ne ""; # GS3 specific 345 $uri_prefix .= "${collect}/"; 346 347 my $id_uri = "${uri_prefix}${OID}/openannotation-list.json"; 348 333 349 my $openannotation_list = { 334 350 "\@context" => "http://www.shared-canvas.org/ns/context.json", 335 351 # "\@id" => "https://iiif.harvardartmuseums.org/manifests/object/299843/list/47174896", 336 "\@id" => "${OID}/openannotation-list.json", # #### **** make full URL to be unique? or greenstone3:site:collect:OID ??352 "\@id" => $id_uri, 337 353 "\@type" => "sc:AnnotationList", 338 354 "resources" => [] … … 340 356 341 357 $self->{'openannotation-list'} = $openannotation_list; 358 $self->{'openannotation-uri-prefix'} = $uri_prefix; 342 359 } 343 360 … … 386 403 387 404 my $block_i = 0; 405 406 my $uri_prefix = $self->{'openannotation-uri-prefix'}; 388 407 389 408 foreach my $block (@{$gv_blocks}) { 390 409 $block_i++; 391 410 411 my $annotation_id_uri = "${uri_prefix}${OID_with_section}/annotation/gv-block-$block_i"; 412 392 413 my $openannotation_resource = { 393 414 "\@context" => "http://iiif.io/api/presentation/2/context.json", 394 415 # "\@id" => "https://iiif.harvardartmuseums.org/annotations/9641482", 395 "\@id" => "${OID_with_section}/annotation/gv-block-$block_i", # #### **** make full URL to be unique? or greenstone3:site:collect:OID ??416 "\@id" => $annotation_id_uri, 396 417 "\@type" => "oa:Annotation", 397 418 "motivation" => [ "oa:commenting" ] … … 404 425 my $bb_x_dim = $bbox_rect->{'x_dim'}; 405 426 my $bb_y_dim = $bbox_rect->{'y_dim'}; 406 407 my $openannotation_on = { 408 "\@type" => "oa:SpecificResource", 427 428 my $canvas_full_uri = "${uri_prefix}${OID}/canvas/$section"; 429 my $manifest_id_uri = "${uri_prefix}${OID_with_section}/manifest"; 430 431 # { 432 # "type": "FragmentSelector", 433 # "value": "xywh=1265,1217,166,205" 434 # }, 435 # { 436 # "type": "SvgSelector", 437 # "value": "<svg xmlns='http://www.w3.org/2000/svg'><path xmlns=\"http://www.w3.org/2000/svg\" d=\"M1265,1422.08859v-205h166v205z\" data-paper-data=\"{"state":null}\" fill=\"none\" fill-rule=\"nonzero\" stroke=\"#00bfff\" stroke-width=\"1\" stroke-linecap=\"butt\" stroke-linejoin=\"miter\" stroke-miterlimit=\"10\" stroke-dasharray=\"\" stroke-dashoffset=\"0\" font-family=\"none\" font-weight=\"none\" font-size=\"none\" text-anchor=\"none\" style=\"mix-blend-mode: normal\"/></svg>" 438 # } 439 440 my $bb_y_org_plus_y_dim = $bb_y_org + $bb_y_dim; 441 my $openannotation_on = [ { 442 "\@type" => "oa:SpecificResource", 409 443 # "full" => "https://iiif.harvardartmuseums.org/manifests/object/299843/canvas/canvas-47174896", 410 "full" => "${OID}/canvas/$section", # doc id + /canvas + page-i/sect # #### **** make full URL to be unique? or greenstone3:site:collect:OID ??444 "full" => $canvas_full_uri, 411 445 "selector" => { 412 "\@type" => "oa:FragmentSelector", 413 "value" => "xywh=${bb_x_org},${bb_y_org},${bb_x_dim},${bb_y_dim}" 414 }, 415 "within" => { 416 #"\@id" => "https://iiif.harvardartmuseums.org/manifests/object/299843", 417 "\@id" => "${OID_with_section}/manifest", # #### **** make full URL to be unique? or greenstone3:site:collect:OID... ?? 418 "\@type" => "sc:Manifest" 446 "\@type" => "oa:Choice", 447 "default" => { 448 "\@type" => "oa:FragmentSelector", 449 "value" => "xywh=${bb_x_org},${bb_y_org},${bb_x_dim},${bb_y_dim}" 450 }, 451 "item" => { 452 "\@type" => "oa:SvgSelector", 453 "value" => "<svg xmlns='http://www.w3.org/2000/svg'><path xmlns='http://www.w3.org/2000/svg' d='M${bb_x_org},${bb_y_org_plus_y_dim}v-${bb_y_dim}h${bb_x_dim}v${bb_y_dim}z' data-paper-data='{"state":null}' fill='none' fill-rule='nonzero' stroke='#008000' stroke-width='1' stroke-linecap='butt' stroke-linejoin='miter' stroke-miterlimit='10' stroke-dasharray='' stroke-dashoffset='0' font-family='none' font-weight='none' font-size='none' text-anchor='none' style='mix-blend-mode: normal'/></svg>" 454 } 419 455 } 420 }; 456 #"within" => { 457 # #"\@id" => "https://iiif.harvardartmuseums.org/manifests/object/299843", 458 # "\@id" => $manifest_id_uri, 459 # "\@type" => "sc:Manifest" 460 #} 461 } ]; 462 463 # # "on": "http://localhost:8887/coin/canvas#xywh=3706,208,522,522" 464 # my $openannotation_on = "${canvas_full_uri}#xywh=${bb_x_org},${bb_y_org},${bb_x_dim},${bb_y_dim}"; 465 421 466 $openannotation_resource->{'on'} = $openannotation_on; 422 467 … … 439 484 440 485 $block_text_html .= "<p>\n$para_text\n</p>\n\n"; 486 # $block_text_html .= "\n$para_text\n\n"; 441 487 } 442 488 … … 446 492 "format" => "text/html" 447 493 }]; 494 495 496 #"resource": { 497 # "@type": "cnt:ContentAsText", 498 # "format": "text/plain", 499 # "chars": "Zeus seated on stool-throne" 500 #}, 501 502 #my $openannotation_inner_resource = [{ 503 # "\@type" => "cnt:ContentAsText", 504 # "format" => "text/plain", 505 # "chars" => "$block_text_html" 506 #}]; 448 507 449 508 $openannotation_resource->{'resource'} = $openannotation_inner_resource; … … 500 559 501 560 $self->{'openannotation-list'} = undef; 502 561 $self->{'openannotation-uri-prefix'} = undef; 562 503 563 return $ret_status; 504 564 } 565 505 566 506 567 sub openannotation_list_associate_json … … 588 649 } 589 650 651 652 653 sub start_webannotation_list_INPROGRESS 654 { 655 my $self = shift (@_); 656 my ($doc_obj,$section) = @_; 657 658 my $OID = $doc_obj->get_OID(); 659 660 my $site = $self->{'site'}; 661 my $collect = my $collect_dir = $ENV{'GSDLCOLLECTION'}; 662 663 # Implication of the following is that the generated webannotation-list JSON content 664 # is bound to the site/collection where it has been imported. 665 # => if renaming a collection at the file system level, then 666 # (i) The versios of webannotation-list*.json in the collections 'cache' dir 667 # need to be removed 668 # (ii) And collection rebuilt 669 670 my $uri_prefix = "http-greenstone://"; 671 $uri_prefix .= "${site}/" if (defined $site) && $site ne ""; # GS3 specific 672 $uri_prefix .= "${collect}/"; 673 674 my $id_uri = "${uri_prefix}${OID}/webannotation-list.json"; 675 676 my $webannotation_list = { 677 "\@context" => "http://www.shared-canvas.org/ns/context.json", 678 "\@id" => $id_uri, 679 "\@type" => "sc:AnnotationList", 680 "resources" => [] 681 }; 682 683 $self->{'webannotation-list'} = $webannotation_list; 684 $self->{'webannotation-uri-prefix'} = $uri_prefix; 685 } 686 687 688 689 690 sub convert_gvocr_to_webannotation_resource_INPROGRESS 691 { 692 my $self = shift (@_); 693 my ($gv_blocks, $doc_obj, $section) = @_; 694 695 my $OID = $doc_obj->get_OID(); 696 my $OID_with_section = ($section ne "") ? "${OID}_$section" : $OID; 697 $section = 1 if ($section eq ""); # occurs when the document is a single image 698 699 # Details on difference between OpenAnnotation and WebAnnotation covered at 700 # https://www.google.com/search?q=iiif+simpleannotationserver&sxsrf=ALiCzsbIpm1YO0SYE9sCXBQ231_oyEmopw:1672137985013&source=lnms&tbm=vid&sa=X&ved=2ahUKEwizu_K0z5n8AhXF1DgGHQ7FCb4Q_AUoA3oECAEQBQ&biw=1536&bih=742&dpr=1.25#fpstate=ive&vld=cid:07a4e9d9,vid:gFNWWIe5QpM 701 702 703 my $self_webannotation_resources = $self->{'webannotation-list'}->{'resources'}; 704 705 my $block_i = 0; 706 707 my $uri_prefix = $self->{'webannotation-uri-prefix'}; 708 709 foreach my $block (@{$gv_blocks}) { 710 $block_i++; 711 712 my $annotation_id_uri = "${uri_prefix}${OID_with_section}/annotation/gv-block-$block_i"; 713 714 my $webannotation_resource = { 715 "\@context" => "http://iiif.io/api/presentation/2/context.json", 716 "id" => $annotation_id_uri, 717 "type" => "Annotation", 718 "motivation" => [ "commenting" ] 719 }; 720 721 722 my $bbox_rect = $self->gv_ocr_bounding_box_rect($block); 723 my $bb_x_org = $bbox_rect->{'x_org'}; 724 my $bb_y_org = $bbox_rect->{'y_org'}; 725 my $bb_x_dim = $bbox_rect->{'x_dim'}; 726 my $bb_y_dim = $bbox_rect->{'y_dim'}; 727 728 my $canvas_full_uri = "${uri_prefix}${OID}/canvas/$section"; 729 my $manifest_id_uri = "${uri_prefix}${OID_with_section}/manifest"; 730 731 # Needs updating -- see openannotation_on above !!!!!!! ********* 732 my $webannotation_target = [ { 733 "type" => "oa:SpecificResource", 734 "full" => $canvas_full_uri, 735 "selector" => { 736 "type" => "oa:FragmentSelector", 737 "value" => "xywh=${bb_x_org},${bb_y_org},${bb_x_dim},${bb_y_dim}" 738 }, 739 "within" => { 740 "id" => $manifest_id_uri, 741 "type" => "sc:Manifest" 742 } 743 } ]; 744 $webannotation_resource->{'target'} = $webannotation_target; 745 746 747 my $block_text_html = ""; 748 749 foreach my $paragraph (@{$block->{'paragraphs'}}) { 750 my $para_text = ""; 751 752 foreach my $word (@{$paragraph->{'words'}}) { 753 my $word_text = ""; 754 755 foreach my $letter (@{$word->{'symbols'}}) { 756 $word_text .= $letter->{'text'}; 757 } 758 759 $para_text .= " " if $para_text ne ""; 760 $para_text .= $word_text; 761 } 762 763 $block_text_html .= "<p>\n$para_text\n</p>\n\n"; 764 } 765 766 my $webannotation_body = [{ 767 "type" => "TextualBody", 768 "chars" => "$block_text_html", 769 "format" => "text/html" 770 }]; 771 772 $webannotation_resource->{'body'} = $webannotation_body; 773 774 push(@$self_webannotation_resources,$webannotation_resource); 775 } 776 } 777 778 779 sub convert_and_append_webannotation_resources_INPROGRESS 780 { 781 my $self = shift (@_); 782 my ($gv_dococr_json_filename, $doc_obj, $section) = @_; 783 784 785 # Read in JSON file 786 my $json_text = do { 787 open(my $json_fh, "<:encoding(UTF-8)", $gv_dococr_json_filename) 788 or die("Can't open \"$gv_dococr_json_filename\": $!\n"); 789 local $/; 790 <$json_fh> 791 }; 792 793 my $decoded_json = JSON::from_json($json_text); 794 795 my $gv_blocks = $decoded_json->{'fullTextAnnotation'}->{'pages'}->[0]->{'blocks'}; 796 797 $self->convert_gvocr_to_webannotation_resource_INPROGRESS($gv_blocks, $doc_obj, $section); 798 } 799 800 801 802 sub end_webannotation_list_INPROGRESS 803 { 804 my $self = shift (@_); 805 my ($doc_obj,$json_ofilename) = @_; 806 807 my $ret_status = 1; 808 809 if (!open(JOUT, "> $json_ofilename")) { 810 print STDERR "Error: Failed save Open Annotation List JSON to \"$json_ofilename\":\n $!\n"; 811 $ret_status = 0; 812 } 813 else { 814 binmode(JOUT, ":utf8"); 815 816 my $webannotation_list = $self->{'webannotation-list'}; 817 my $webannotation_list_json_text = JSON::encode_json($webannotation_list); 818 819 print JOUT $webannotation_list_json_text; 820 close JOUT; 821 822 } 823 824 $self->{'webannotation-list'} = undef; 825 $self->{'webannotation-uri-prefix'} = undef; 826 827 return $ret_status; 828 } 829 830 sub webannotation_list_associate_json_INPROGRESS 831 { 832 my $self = shift (@_); 833 my ($doc_obj, $gv_dococr_json_filename_recs) = @_; 834 835 my $outhandle = $self->{'outhandle'}; 836 837 my $all_saved_ok = 1; 838 839 for my $gv_json_filename_rec (@$gv_dococr_json_filename_recs) { 840 my $gv_json_filename = $gv_json_filename_rec->{'filename'}; 841 my $section = $gv_json_filename_rec->{'section'}; 842 843 844 my ($gv_dococr_filename_root) = ($gv_dococr_json_filename_recs->[0]->{'filename'} =~ m/^(.+)\.json$/); 845 846 # slight of hand so new directory spot in cache_dir picked out is where we want it! 847 $gv_dococr_filename_root .= "/"; 848 849 my $collect_dir = $ENV{'GSDLCOLLECTDIR'}; 850 my $toplevel_cached_dir = &FileUtils::filenameConcatenate($collect_dir,"cached"); 851 852 $self->init_cache_for_file($gv_dococr_filename_root); 853 my $cached_dir = $self->{'cached_dir'}; 854 855 my $assoc_webannotation_json_ofile = "webannotation-list${section}.json"; 856 my $cached_webannotation_json_ofilename = &FileUtils::filenameConcatenate($cached_dir,$assoc_webannotation_json_ofile); 857 858 my $needs_json_regen = 0; 859 860 if (!-f $cached_webannotation_json_ofilename) { 861 $needs_json_regen = 1; 862 } 863 else { 864 if (-M $gv_json_filename > -M $cached_webannotation_json_ofilename) { 865 $needs_json_regen = 1; 866 } 867 } 868 869 my $saved_ok = 1; 870 871 if ($needs_json_regen) { 872 873 print $outhandle " WebAnnotation-List: Generating $cached_webannotation_json_ofilename\n"; 874 875 $self->start_webannotation_list_INPROGRESS($doc_obj); 876 $self->convert_and_append_webannotation_resources_INPROGRESS($gv_json_filename, $doc_obj,$section); 877 878 $saved_ok = $self->end_webannotation_list_INPROGRESS($doc_obj,$cached_webannotation_json_ofilename); 879 } 880 else { 881 print $outhandle " WebAnnotation-List: Cached file $cached_webannotation_json_ofilename already exists\n"; 882 } 883 884 if ($saved_ok) { 885 my $top_section = $doc_obj->get_top_section(); 886 $doc_obj->associate_file($cached_webannotation_json_ofilename,$assoc_webannotation_json_ofile,"application/json",$top_section); 887 } 888 else { 889 $all_saved_ok = 0; 890 } 891 } 892 893 return $all_saved_ok; 894 } 895 896 897 sub opt_run_gen_webannotation_INPROGRESS 898 { 899 my $self = shift (@_); 900 my ($doc_obj) = @_; 901 902 my $gv_dococr_json_filename_recs = $self->{'gv-dococr-json-filename-recs'}; 903 my $num_gv_dococr_json_filename_recs = scalar(@$gv_dococr_json_filename_recs); 904 905 my $ret_val_ok = 1; 906 907 if ($num_gv_dococr_json_filename_recs > 0) { 908 $ret_val_ok = $self->webannotation_list_associate_json($doc_obj,$gv_dococr_json_filename_recs); 909 } 910 911 return $ret_val_ok; 912 } 913 914 590 915 1; 591 916 -
gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionImagePlugin.pm
r37046 r37065 85 85 $self->{'gv-dococr-json-filename-recs'} = []; 86 86 87 # The following also builds up the openanotation_list 'resources' array88 87 $self->run_gv_convert($filename_full_path,$url_encoded_filename,$doc_obj); 89 88 … … 99 98 if ($ret_val_ok) { 100 99 $ret_val_ok = $self->opt_run_gen_openannotation($doc_obj); 100 #$ret_val_ok = $self->opt_run_gen_webannotation($doc_obj); 101 101 } 102 102 -
gs3-extensions/structured-image/trunk/perllib/plugins/GoogleVisionPagedImagePlugin.pm
r37046 r37065 119 119 if ($ret_val_ok) { 120 120 $ret_val_ok = $self->opt_run_gen_openannotation($doc_obj); 121 #$ret_val_ok = $self->opt_run_gen_webannotation($doc_obj); 121 122 } 122 123
Note:
See TracChangeset
for help on using the changeset viewer.