Changeset 10168
- Timestamp:
- 2005-06-24T12:16:01+12:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/PagedImgPlug.pm
r10153 r10168 34 34 # document/book. 35 35 # 36 #There are two formats for the item files: a plain text format, and an xml 37 #format. You can use either format, and can have both formats in the same 38 #collection if you like. If you use the plain format, you must not start the 39 #file off with <PagedDocument> 40 41 #### PLAIN FORMAT 36 42 # The format of the xxx.item file is as follows: 37 43 # The first lines contain any metadata for the whole document … … 60 66 # should be rotated. 61 67 # 68 69 #### XML FORMAT 70 # The xml format looks like the following 71 #<PagedDocument> 72 #<Metadata name="Title">The Title of the entire document</Metadata> 73 #<Page pagenum="1" imgfile="xxx.jpg" txtfile="yyy.jpg"> 74 #<Metadata name="Title">The Title of this page</Metadata> 75 #</Page> 76 #... more pages 77 #</PagedDocument> 78 #PagedDocument contains a list of Pages, Metadata and PageGroups. Any metadata 79 #that is not inside another tag will belong to the document. 80 #Each Page has a pagenum (not used at the moment), an imgfile and/or a txtfile. 81 #These are both optional - if neither is used, the section will have no content. 82 #Pages can also have metadata associated with them. 83 #PageGroups can be introduced at any point - they can contain Metadata and Pages and other PageGroups. They are used to introduce hierarchical structure into the document. 84 #For example 85 #<PagedDocument> 86 #<PageGroup> 87 #<Page> 88 #<Page> 89 #</PageGroup> 90 #<Page> 91 #</PagedDocument> 92 #would generate a structure like 93 #X 94 #--X 95 # --X 96 # --X 97 #--X 98 #PageGroup tags can also have imgfile/textfile metadata if you like - this way they get some content themselves. 99 100 #Currently the XML structure doesn't work very well with the paged document type, unless you use numerical Titles for each section. 101 #There is still a bit of work to do on this format: 102 #* enable other text file types, eg html, pdf etc 103 #* make the document paging work properly 104 #* add pagenum as Title unless a Title is present? 105 62 106 # All the supplemetary image amd text files should be in the same folder as 63 107 # the .item file. … … 86 130 # Additional metadata can be added into the .item files, alternatively you can 87 131 # use normal metadata.xml files, with the name of the xxx.item file as the 88 # FileName .132 # FileName (only for document level metadata). 89 133 90 134 package PagedImgPlug; 91 135 92 use BasPlug;136 use XMLPlug; 93 137 94 138 sub BEGIN { 95 @ISA = (' BasPlug');139 @ISA = ('XMLPlug'); 96 140 } 97 141 … … 175 219 'args' => $arguments }; 176 220 177 178 221 sub new { 179 222 my ($class) = @_; 180 223 my $plugin_name = shift (@_); 181 my $self = new BasPlug ("PagedImgPlug", @_);224 $self = new XMLPlug ("PagedImgPlug", @_); 182 225 183 226 my $option_list = $self->{'option_list'}; … … 334 377 } else { 335 378 $doc_obj->add_metadata ($section, "srclink", 336 "<a href=\"_httpcollection_/index/assoc/[parent :assocfilepath]/[Image]\">");337 $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpcollection_/index/assoc/[parent :assocfilepath]/[Image]\">");379 "<a href=\"_httpcollection_/index/assoc/[parent(Top):assocfilepath]/[Image]\">"); 380 $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpcollection_/index/assoc/[parent(Top):assocfilepath]/[Image]\">"); 338 381 339 382 } … … 373 416 $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype); 374 417 $doc_obj->add_metadata ($section, "Thumb", $id."thumb.$thumbnailtype"); 375 376 $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpcollection_/index/assoc/[parent:assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>"); 418 if ($top) { 419 $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpcollection_/index/assoc/[assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>"); 420 } else { 421 $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpcollection_/index/assoc/[parent(Top):assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>"); 422 } 377 423 } 378 424 … … 430 476 $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpcollection_/index/assoc/[assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>"); 431 477 } else { 432 $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpcollection_/index/assoc/ {If}{[parent:assocfilepath],[parent:assocfilepath],[assocfilepath]}/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>");478 $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpcollection_/index/assoc/[parent(Top):assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>"); 433 479 434 480 } … … 500 546 501 547 sub read { 502 my$self = shift (@_);548 $self = shift (@_); 503 549 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 504 505 550 my $outhandle = $self->{'outhandle'}; 506 551 my $smart_block = $self->{'smart_block'}; … … 523 568 return 0; # blocked 524 569 } 525 570 526 571 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 527 572 return undef; … … 532 577 print STDERR "<Processing n='$file' p='PagedImgPlug'>\n" if ($gli); 533 578 534 my ($dir); 535 ($dir, $file) = $filename =~ /^(.*?)([^\/\\]*)$/; 536 537 #process the .item file 538 my $doc_obj = $self->process_item($filename, $dir, $file, $processor); 539 579 # here we need to decide if we have an old text .item file, or a new xml 580 # .item file - for now the test is if the first non-empty line is 581 # <PagedDocument> then its xml 582 my $xml_version = 0; 583 open (ITEMFILE, $filename) || die "couldn't open $filename\n"; 584 my $line = ""; 585 my $num = 0; 586 $line = <ITEMFILE>; 587 while ($line !~ /\w/) { 588 $line = <ITEMFILE>; 589 } 590 chomp $line; 591 if ($line =~ /^<PagedDocument/) { 592 $xml_version = 1; 593 } 594 close ITEMFILE; 595 my $doc_obj; 596 if ($xml_version) { 597 598 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 599 $self->{'file'} = $file; 600 $self->{'filename'} = $filename; 601 $self->{'processor'} = $processor; 602 $self->{'metadata'} = $metadata; 603 $self->{'gli'} = $gli; 604 eval { 605 $@ = ""; 606 my $xslt = $self->{'xslt'}; 607 if (defined $xslt && ($xslt ne "")) { 608 # perform xslt 609 my $transformed_xml = $self->apply_xslt($xslt,$filename); 610 611 # feed transformed file (now in memory as string) into XML parser 612 #$self->{'parser'}->parse($transformed_xml); 613 $self->parse_string($transformed_xml); 614 } 615 else { 616 #$self->{'parser'}->parsefile($filename); 617 $self->parse_file($filename); 618 } 619 }; 620 621 if ($@) { 622 623 # parsefile may either croak somewhere in XML::Parser (e.g. because 624 # the document is not well formed) or die somewhere in XMLPlug or a 625 # derived plugin (e.g. because we're attempting to process a 626 # document whose DOCTYPE is not meant for this plugin). For the 627 # first case we'll print a warning and continue, for the second 628 # we'll just continue quietly 629 630 print STDERR "**** XML Parse Error is: $@\n"; 631 632 my ($msg) = $@ =~ /Carp::croak\(\'(.*?)\'\)/; 633 if (defined $msg) { 634 my $outhandle = $self->{'outhandle'}; 635 my $plugin_name = ref ($self); 636 print $outhandle "$plugin_name failed to process $file ($msg)\n"; 637 } 638 639 # reset ourself for the next document 640 $self->{'section_level'}=0; 641 print STDERR "<ProcessingError n='$file'>\n" if ($gli); 642 return -1; # error during processing 643 } 644 $doc_obj = $self->{'doc_obj'}; 645 646 } else { 647 my ($dir); 648 ($dir, $file) = $filename =~ /^(.*?)([^\/\\]*)$/; 649 650 #process the .item file 651 $doc_obj = $self->process_item($filename, $dir, $file, $processor); 652 653 } 654 540 655 if ($self->{'cover_image'}) { 541 656 $self->associate_cover_image($doc_obj, $filename); … … 578 693 579 694 return 1; 695 } 696 697 sub xml_start_tag { 698 my $self = shift(@_); 699 my ($expat, $element) = @_; 700 $self->{'element'} = $element; 701 702 my $doc_obj = $self->{'doc_obj'}; 703 if ($element eq "PagedDocument") { 704 $self->{'current_section'} = $doc_obj->get_top_section(); 705 } elsif ($element eq "PageGroup" || $element eq "Page") { 706 # create a new section as a child 707 $self->{'current_section'} = $doc_obj->insert_section($doc_obj->get_end_child($self->{'current_section'})); 708 $self->{'num_pages'}++; 709 # assign pagenum as what?? 710 my $pagenum = $_{'pagenum'}; #TODO!! 711 $doc_obj->set_utf8_metadata_element($self->{'current_section'}, 'PageNum', $pagenum); 712 my ($imgfile) = $_{'imgfile'}; 713 if (defined $imgfile) { 714 $self->process_image($self->{'base_dir'}.$imgfile, $imgfile, $doc_obj, $self->{'current_section'}); 715 } 716 my ($txtfile) = $_{'txtfile'}; 717 if (defined($txtfile)) { 718 $self->process_text ($self->{'base_dir'}.$txtfile, $txtfile, $doc_obj, $self->{'current_section'}); 719 } else { 720 # otherwise add in some dummy text 721 $doc_obj->add_text($self->{'current_section'}, &gsprintf::lookup_string("{BasPlug.dummy_text}")); 722 } 723 } elsif ($element eq "Metadata") { 724 $self->{'metadata_name'} = $_{'name'}; 725 } 726 } 727 728 sub xml_end_tag { 729 my $self = shift(@_); 730 my ($expat, $element) = @_; 731 732 my $doc_obj = $self->{'doc_obj'}; 733 if ($element eq "Page" || $element eq "PageGroup") { 734 # move the current section back to the parent 735 $self->{'current_section'} = $doc_obj->get_parent_section($self->{'current_section'}); 736 } elsif ($element eq "Metadata") { 737 738 $doc_obj->add_utf8_metadata ($self->{'current_section'}, $self->{'metadata_name'}, $self->{'metadata_value'}); 739 $self->{'metadata_name'} = ""; 740 $self->{'metadata_value'} = ""; 741 742 } 743 # otherwise we ignore the end tag 744 } 745 746 747 sub xml_text { 748 my $self = shift(@_); 749 my ($expat) = @_; 750 751 if ($self->{'element'} eq "Metadata") { 752 $self->{'metadata_value'} .= $_; 753 } 754 } 755 756 sub xml_doctype { 757 } 758 759 sub open_document { 760 my $self = shift(@_); 761 762 # create a new document 763 $self->{'doc_obj'} = new doc ($self->{'filename'}, "indexed_doc"); 764 my $doc_obj = $self->{'doc_obj'}; 765 $doc_obj->set_OIDtype ($self->{'processor'}->{'OIDtype'}); 766 my ($dir, $file) = $self->{'filename'} =~ /^(.*?)([^\/\\]*)$/; 767 $self->{'base_dir'} = $dir; 768 $self->{'num_pages'} = 0; 769 my $topsection = $doc_obj->get_top_section(); 770 if ($self->{'doctype'} eq 'paged') { 771 # set the gsdlthistype metadata to Paged - this ensures this document will 772 # be treated as a Paged doc, even if Titles are not numeric 773 774 $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Paged"); 775 } else { 776 $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Hierarchy"); 777 } 778 779 $doc_obj->add_metadata ($topsection, "Source", $file); 780 if ($self->{'headerpage'}) { 781 $doc_obj->add_text($topsection, &gsprintf::lookup_string("{BasPlug.dummy_text}")); 782 } 783 784 } 785 786 sub close_document { 787 my $self = shift(@_); 788 my $doc_obj = $self->{'doc_obj'}; 789 790 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 791 $doc_obj->add_metadata($doc_obj->get_top_section(), "FileFormat", "PagedImg"); 792 793 # add numpages metadata 794 $doc_obj->set_utf8_metadata_element ($doc_obj->get_top_section(), 'NumPages', $self->{'num_pages'}); 795 796 # add an OID 797 $doc_obj->set_OID(); 798 580 799 } 581 800
Note:
See TracChangeset
for help on using the changeset viewer.