Changeset 10168


Ignore:
Timestamp:
2005-06-24T12:16:01+12:00 (19 years ago)
Author:
kjdon
Message:

modified this to use a new xml format. it should work as before on the old format. now inherits from XMLPlug.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/PagedImgPlug.pm

    r10153 r10168  
    3434# document/book.
    3535#
     36#There are two formats for the item files: a plain text format, and an xml
     37#format. You can use either format, and can have both formats in the same
     38#collection if you like. If you use the plain format, you must not start the
     39#file off with <PagedDocument>
     40
     41#### PLAIN FORMAT
    3642# The format of the xxx.item file is as follows:
    3743# The first lines contain any metadata for the whole document
     
    6066# should be rotated.
    6167#
     68
     69#### XML FORMAT
     70# The xml format looks like the following
     71#<PagedDocument>
     72#<Metadata name="Title">The Title of the entire document</Metadata>
     73#<Page pagenum="1" imgfile="xxx.jpg" txtfile="yyy.jpg">
     74#<Metadata name="Title">The Title of this page</Metadata>
     75#</Page>
     76#... more pages
     77#</PagedDocument>
     78#PagedDocument contains a list of Pages, Metadata and PageGroups. Any metadata
     79#that is not inside another tag will belong to the document.
     80#Each Page has a pagenum (not used at the moment), an imgfile and/or a txtfile.
     81#These are both optional - if neither is used, the section will have no content.
     82#Pages can also have metadata associated with them.
     83#PageGroups can be introduced at any point - they can contain Metadata and Pages and other PageGroups. They are used to introduce hierarchical structure into the document.
     84#For example
     85#<PagedDocument>
     86#<PageGroup>
     87#<Page>
     88#<Page>
     89#</PageGroup>
     90#<Page>
     91#</PagedDocument>
     92#would generate a structure like
     93#X
     94#--X
     95#  --X
     96#  --X
     97#--X
     98#PageGroup tags can also have imgfile/textfile metadata if you like - this way they get some content themselves.
     99
     100#Currently the XML structure doesn't work very well with the paged document type, unless you use numerical Titles for each section.
     101#There is still a bit of work to do on this format:
     102#* enable other text file types, eg html, pdf etc
     103#* make the document paging work properly
     104#* add pagenum as Title unless a Title is present?
     105
    62106# All the supplemetary image amd text files should be in the same folder as
    63107# the .item file.
     
    86130# Additional metadata can be added into the .item files, alternatively you can
    87131# use normal metadata.xml files, with the name of the xxx.item file as the
    88 # FileName.
     132# FileName (only for document level metadata).
    89133
    90134package PagedImgPlug;
    91135
    92 use BasPlug;
     136use XMLPlug;
    93137
    94138sub BEGIN {
    95     @ISA = ('BasPlug');
     139    @ISA = ('XMLPlug');
    96140}
    97141
     
    175219        'args'     => $arguments };
    176220
    177 
    178221sub new {
    179222    my ($class) = @_;
    180223    my $plugin_name = shift (@_);
    181     my $self = new BasPlug ("PagedImgPlug", @_);
     224    $self = new XMLPlug ("PagedImgPlug", @_);
    182225
    183226    my $option_list = $self->{'option_list'};
     
    334377    } else {
    335378    $doc_obj->add_metadata ($section, "srclink",
    336                 "<a href=\"_httpcollection_/index/assoc/[parent:assocfilepath]/[Image]\">");
    337     $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpcollection_/index/assoc/[parent:assocfilepath]/[Image]\">");
     379                "<a href=\"_httpcollection_/index/assoc/[parent(Top):assocfilepath]/[Image]\">");
     380    $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpcollection_/index/assoc/[parent(Top):assocfilepath]/[Image]\">");
    338381
    339382    }
     
    373416        $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype);
    374417        $doc_obj->add_metadata ($section, "Thumb", $id."thumb.$thumbnailtype");
    375        
    376         $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpcollection_/index/assoc/[parent:assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>");
     418        if ($top) {
     419        $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpcollection_/index/assoc/[assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>");
     420        } else {
     421        $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpcollection_/index/assoc/[parent(Top):assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>");
     422        }
    377423    }
    378424   
     
    430476        $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpcollection_/index/assoc/[assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>");
    431477        } else {
    432         $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpcollection_/index/assoc/{If}{[parent:assocfilepath],[parent:assocfilepath],[assocfilepath]}/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>");
     478        $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpcollection_/index/assoc/[parent(Top):assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>");
    433479
    434480        }
     
    500546
    501547sub read {
    502     my $self = shift (@_);
     548    $self = shift (@_);
    503549    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    504 
    505550    my $outhandle   = $self->{'outhandle'};
    506551    my $smart_block = $self->{'smart_block'};
     
    523568    return 0; # blocked
    524569    }
    525 
     570   
    526571    if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    527572    return undef;
     
    532577    print STDERR "<Processing n='$file' p='PagedImgPlug'>\n" if ($gli);
    533578
    534     my ($dir);
    535     ($dir, $file) = $filename =~ /^(.*?)([^\/\\]*)$/;
    536 
    537     #process the .item file
    538     my $doc_obj = $self->process_item($filename, $dir, $file, $processor);
    539 
     579    # here we need to decide if we have an old text .item file, or a new xml
     580    # .item file - for now the test is if the first non-empty line is
     581    # <PagedDocument> then its xml
     582    my $xml_version = 0;
     583    open (ITEMFILE, $filename) || die "couldn't open $filename\n";
     584    my $line = "";
     585    my $num = 0;
     586    $line = <ITEMFILE>;
     587    while ($line !~ /\w/) {
     588    $line = <ITEMFILE>;
     589    }
     590    chomp $line;
     591    if ($line =~ /^<PagedDocument/) {
     592    $xml_version = 1;
     593    }
     594    close ITEMFILE;
     595    my $doc_obj;
     596    if ($xml_version) {
     597
     598    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
     599    $self->{'file'} = $file;
     600    $self->{'filename'} = $filename;
     601    $self->{'processor'} = $processor;
     602    $self->{'metadata'} = $metadata;
     603    $self->{'gli'} = $gli;
     604    eval {
     605        $@ = "";
     606        my $xslt = $self->{'xslt'};
     607        if (defined $xslt && ($xslt ne "")) {
     608        # perform xslt
     609        my $transformed_xml = $self->apply_xslt($xslt,$filename);
     610       
     611        # feed transformed file (now in memory as string) into XML parser
     612        #$self->{'parser'}->parse($transformed_xml);
     613        $self->parse_string($transformed_xml);
     614        }
     615        else {
     616        #$self->{'parser'}->parsefile($filename);
     617        $self->parse_file($filename);
     618        }
     619    };
     620   
     621    if ($@) {
     622       
     623        # parsefile may either croak somewhere in XML::Parser (e.g. because
     624        # the document is not well formed) or die somewhere in XMLPlug or a
     625        # derived plugin (e.g. because we're attempting to process a
     626        # document whose DOCTYPE is not meant for this plugin). For the
     627        # first case we'll print a warning and continue, for the second
     628        # we'll just continue quietly
     629       
     630        print STDERR "**** XML Parse Error is: $@\n";
     631       
     632        my ($msg) = $@ =~ /Carp::croak\(\'(.*?)\'\)/;
     633        if (defined $msg) {
     634        my $outhandle = $self->{'outhandle'};
     635        my $plugin_name = ref ($self);
     636        print $outhandle "$plugin_name failed to process $file ($msg)\n";
     637        }
     638
     639        # reset ourself for the next document
     640        $self->{'section_level'}=0;
     641        print STDERR "<ProcessingError n='$file'>\n" if ($gli);
     642        return -1; # error during processing
     643    }
     644    $doc_obj = $self->{'doc_obj'};
     645
     646    } else {
     647    my ($dir);
     648    ($dir, $file) = $filename =~ /^(.*?)([^\/\\]*)$/;
     649
     650    #process the .item file
     651    $doc_obj = $self->process_item($filename, $dir, $file, $processor);
     652   
     653    }
     654   
    540655    if ($self->{'cover_image'}) {
    541656    $self->associate_cover_image($doc_obj, $filename);
     
    578693
    579694    return 1;
     695}
     696
     697sub xml_start_tag {
     698    my $self = shift(@_);
     699    my ($expat, $element) = @_;
     700    $self->{'element'} = $element;
     701   
     702    my $doc_obj = $self->{'doc_obj'};
     703    if ($element eq "PagedDocument") {
     704    $self->{'current_section'} = $doc_obj->get_top_section();
     705    } elsif ($element eq "PageGroup" || $element eq "Page") {
     706    # create a new section as a child
     707    $self->{'current_section'} = $doc_obj->insert_section($doc_obj->get_end_child($self->{'current_section'}));
     708    $self->{'num_pages'}++;
     709    # assign pagenum as  what??
     710    my $pagenum = $_{'pagenum'}; #TODO!!
     711    $doc_obj->set_utf8_metadata_element($self->{'current_section'}, 'PageNum', $pagenum);
     712    my ($imgfile) = $_{'imgfile'};
     713    if (defined $imgfile) {
     714        $self->process_image($self->{'base_dir'}.$imgfile, $imgfile, $doc_obj, $self->{'current_section'});
     715    }
     716    my ($txtfile) = $_{'txtfile'};
     717    if (defined($txtfile)) {
     718        $self->process_text ($self->{'base_dir'}.$txtfile, $txtfile, $doc_obj, $self->{'current_section'});
     719    } else {
     720        # otherwise add in some dummy text
     721        $doc_obj->add_text($self->{'current_section'}, &gsprintf::lookup_string("{BasPlug.dummy_text}"));
     722    }
     723    } elsif ($element eq "Metadata") {
     724    $self->{'metadata_name'} = $_{'name'};
     725    }
     726}
     727
     728sub xml_end_tag {
     729    my $self = shift(@_);
     730    my ($expat, $element) = @_;
     731   
     732    my $doc_obj = $self->{'doc_obj'};
     733    if ($element eq "Page" || $element eq "PageGroup") {
     734    # move the current section back to the parent
     735    $self->{'current_section'} = $doc_obj->get_parent_section($self->{'current_section'});
     736    } elsif ($element eq "Metadata") {
     737   
     738    $doc_obj->add_utf8_metadata ($self->{'current_section'}, $self->{'metadata_name'}, $self->{'metadata_value'});
     739    $self->{'metadata_name'} = "";
     740    $self->{'metadata_value'} = "";
     741
     742    }
     743    # otherwise we ignore the end tag
     744}
     745
     746
     747sub xml_text {
     748    my $self = shift(@_);
     749    my ($expat) = @_;
     750
     751    if ($self->{'element'} eq "Metadata") {
     752    $self->{'metadata_value'} .= $_;
     753    }
     754}
     755
     756sub xml_doctype {
     757}
     758
     759sub open_document {
     760    my $self = shift(@_);
     761   
     762    # create a new document
     763    $self->{'doc_obj'} = new doc ($self->{'filename'}, "indexed_doc");
     764    my $doc_obj = $self->{'doc_obj'};
     765    $doc_obj->set_OIDtype ($self->{'processor'}->{'OIDtype'});
     766    my ($dir, $file) = $self->{'filename'} =~ /^(.*?)([^\/\\]*)$/;
     767    $self->{'base_dir'} = $dir;
     768    $self->{'num_pages'} = 0;
     769    my $topsection = $doc_obj->get_top_section();
     770    if ($self->{'doctype'} eq 'paged') {
     771    # set the gsdlthistype metadata to Paged - this ensures this document will
     772    # be treated as a Paged doc, even if Titles are not numeric
     773   
     774    $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Paged");
     775    } else {
     776    $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Hierarchy");
     777    }
     778
     779    $doc_obj->add_metadata ($topsection, "Source", $file);
     780    if ($self->{'headerpage'}) {
     781    $doc_obj->add_text($topsection, &gsprintf::lookup_string("{BasPlug.dummy_text}"));
     782    }
     783
     784}
     785
     786sub close_document {
     787    my $self = shift(@_);
     788    my $doc_obj = $self->{'doc_obj'};
     789       
     790    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
     791    $doc_obj->add_metadata($doc_obj->get_top_section(), "FileFormat", "PagedImg");
     792   
     793    # add numpages metadata
     794    $doc_obj->set_utf8_metadata_element ($doc_obj->get_top_section(), 'NumPages', $self->{'num_pages'});
     795
     796    # add an OID
     797    $doc_obj->set_OID();
     798   
    580799}
    581800
Note: See TracChangeset for help on using the changeset viewer.