Ignore:
Timestamp:
2006-10-27T13:41:01+13:00 (18 years ago)
Author:
kjdon
Message:

Moved all printing stuff out of doc.pm.
docprint now prints a GA representation of a doc obj - use &docprint::get_section_xml instead of $doc_obj->buffer_section_xml or $doc_obj->output_section.
Most of the code has been moved into plugouts, except for the bit thats gone to docprint.pm.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/doc.pm

    r13050 r13172  
    3939use File::stat;
    4040##use hashdoc;
     41use docprint;
    4142
    4243# the document type may be indexed_doc, nonindexed_doc, or
     
    221222    return $doc_type if (defined $doc_type);
    222223    return "indexed_doc";
    223 }
    224 
    225 sub _escape_text {
    226     my $self = shift (@_);
    227     my ($text) = @_;
    228     # special characters in the gml encoding
    229     $text =~ s/&/&/g; # this has to be first...
    230     $text =~ s/</&lt;/g;
    231     $text =~ s/>/&gt;/g;
    232     $text =~ s/\"/&quot;/g;
    233 
    234     return $text;
    235 }
    236 
    237 sub buffer_section_xml {
    238     my $self = shift (@_);
    239     my ($section) = @_;
    240 
    241     my $section_ptr = $self->_lookup_section ($section);
    242     return "" unless defined $section_ptr;
    243 
    244     my $all_text = "<Section>\n";
    245     $all_text .= "  <Description>\n";
    246    
    247     # output metadata
    248     foreach my $data (@{$section_ptr->{'metadata'}}) {
    249     my $escaped_value = $self->_escape_text($data->[1]);
    250     $all_text .= '    <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
    251     }
    252 
    253     $all_text .= "  </Description>\n";
    254 
    255     # output the text
    256     $all_text .= "  <Content>";
    257     $all_text .= $self->_escape_text($section_ptr->{'text'});
    258     $all_text .= "</Content>\n";
    259    
    260     # output all the subsections
    261     foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {
    262     $all_text .= $self->buffer_section_xml("$section.$subsection");
    263     }
    264    
    265     $all_text .=  "</Section>\n";
    266 
    267     # make sure no nasty control characters have snuck through
    268     # (XML::Parser will barf on anything it doesn't consider to be
    269     # valid UTF-8 text, including things like \c@, \cC etc.)
    270     $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
    271 
    272     return $all_text;
    273 }
    274 
    275 
    276 
    277 
    278 sub output_section {
    279     my $self = shift (@_);
    280     my ($handle, $section) = @_;
    281 
    282     print $handle $self->buffer_section_xml($section);
    283 }
    284 
    285 my $dc_set = { Title => 1,       
    286            Creator => 1,
    287            Subject => 1,
    288            Description => 1,
    289            Publisher => 1,
    290            Contributor => 1,
    291            Date => 1,
    292            Type => 1,
    293            Format => 1,
    294            Identifier => 1,
    295            Source => 1,
    296            Language => 1,
    297            Relation => 1,
    298            Coverage => 1,
    299            Rights => 1};
    300 
    301 
    302 
    303 
    304 # Build up dublin_core metadata.  Priority given to dc.* over ex.*
    305 # This method was apparently added by Jeffrey and committed by Shaoqun.
    306 # But we don't know why it was added, so not using it anymore.
    307 sub new_buffer_dc_section {
    308    
    309     my $self = shift(@_);
    310     my ($section, $version) = @_;
    311 
    312     # build up string of dublin core metadata
    313     $section="" unless defined $section;
    314    
    315     my $section_ptr=$self->_lookup_section($section);
    316     return "" unless defined $section_ptr;
    317     foreach my $data (@{$section_ptr->{'metadata'}}){
    318     my $escaped_value = $self->_escape_text($data->[1]);
    319     my $dc_element =  $data->[0];
    320    
    321     my @array = split('\.',$dc_element);
    322     my ($type,$name);
    323 
    324     if(defined $array[1])
    325     {
    326         $type = $array[0];
    327         $name = $array[1];
    328     }
    329     else
    330     {
    331         $type = "ex";
    332         $name = $array[0];
    333     }
    334    
    335     $all_text .= '   <Metadata Type="'. $type.'" Name="'.$name.'">'. $escaped_value. "</Metadata>\n";
    336     }
    337     return $all_text;
    338 }
    339 
    340 
    341 sub buffer_dc_section {
    342     my $self = shift(@_);
    343     my ($section, $version) = @_;
    344    
    345     # build up string of dublin core metadata
    346     $section="" unless defined $section;
    347    
    348     my $section_ptr=$self->_lookup_section($section);
    349     return "" unless defined $section_ptr;
    350 
    351 
    352     my $explicit_dc = {};
    353     my $explicit_ex = {};
    354 
    355     my $all_text="";
    356     foreach my $data (@{$section_ptr->{'metadata'}}){
    357     foreach my $temp (@$data)
    358     {
    359         print "($temp) ";
    360     }   
    361     print "\n";
    362     my $escaped_value = $self->_escape_text($data->[1]);
    363     if ($data->[0]=~ m/^dc\./) {
    364         $data->[0] =~ tr/[A-Z]/[a-z]/;
    365 
    366         $data->[0] =~ m/^dc\.(.*)/;
    367         my $dc_element =  $1;
    368 
    369         if (!defined $explicit_dc->{$dc_element}) {
    370         $explicit_dc->{$dc_element} = [];
    371         }
    372         push(@{$explicit_dc->{$dc_element}},$escaped_value);
    373 
    374         #$all_text .= '   <dcvalue element="'. $data->[0].'" qualifier="#####">'. $escaped_value. "</dcvalue>\n";
    375         if (defined $version && ($version eq "oai_dc")) {
    376         $all_text .= "   <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
    377         }
    378         else {
    379         $all_text .= '   <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
    380         }
    381 
    382     }
    383     elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) {
    384         $data->[0] =~ m/^(ex\.)?(.*)/;
    385         my $ex_element =  $2;
    386         my $lc_ex_element = lc($ex_element);
    387 
    388         if (defined $dc_set->{$ex_element}) {
    389         if (!defined $explicit_ex->{$lc_ex_element}) {
    390             $explicit_ex->{$lc_ex_element} = [];
    391         }
    392         push(@{$explicit_ex->{$lc_ex_element}},$escaped_value);
    393         }
    394     }
    395     }
    396 
    397     # go through dc_set and for any element *not* defined in explicit_dc
    398     # that do exist in explicit_ex, add it in as metadata
    399     foreach my $k ( keys %$dc_set ) {
    400     my $lc_k = lc($k);
    401 
    402     if (!defined $explicit_dc->{$lc_k}) {
    403         if (defined $explicit_ex->{$lc_k}) {
    404 
    405         foreach my $v (@{$explicit_ex->{$lc_k}}) {
    406             my $dc_element    = $lc_k;
    407             my $escaped_value = $v;
    408 
    409             if (defined $version && ($version eq "oai_dc")) {
    410             $all_text .= "   <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
    411             }
    412             else {
    413             $all_text .= '   <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
    414             }
    415            
    416         }
    417         }
    418     }
    419     }
    420 
    421     if ($all_text eq "") {
    422     $all_text .= "   There is no Dublin Core metatdata in this document\n";
    423     }   
    424     $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
    425 
    426     return $all_text;
    427 }
    428 
    429 
    430 # Print out dublin_core metadata
    431 # changed back to using old method, don't know why Jeffrey changed this
    432 sub output_dc_section {
    433     my $self = shift(@_);
    434     my ($handle, $section, $version) = @_;
    435    
    436     my $all_text = $self->buffer_dc_section($section,$version);
    437     #my $all_text = $self->new_buffer_dc_section($section,$version);
    438    
    439     print $handle $all_text;
    440224}
    441225
     
    555339            print STDERR "doc::set_OID could not write to $filename\n";
    556340        } else {
    557             $self->output_section('OUTFILE', $self->get_top_section(),
    558                       undef, 1);
     341            my $doc_text = &docprint::get_section_xml($self, $self->get_top_section());
     342            print OUTFILE $doc_text;
    559343            close (OUTFILE);
    560344        }
     
    576360    if (!defined $OID)
    577361    {
    578     my $hash_text = $self->buffer_section_gml($self->get_top_section(),
    579                           undef, 1);
     362    my $hash_text = &docprint::get_section_xml($self, $self->get_top_section());
    580363    my $hash_len = length($hash_text);
    581364
     
    976759}
    977760
    978 # returns a xml element of the form <MetadataList><Metadata name="metadata-name">metadata_value</Metadata>...</MetadataList>
    979 sub get_top_metadata_list{
    980     my $self = shift (@_);
    981 
    982     my @topmetadata =$self->get_all_metadata($self->get_top_section());
    983     my $metadatalist ='<MetadataList>';
    984    
    985     foreach my $i (@topmetadata){
    986     foreach my $j (@$i){   
    987         my %metaMap = @$j;
    988         foreach my $key (keys %metaMap){
    989         $metadatalist .='<Metadata name='."\"$key\"".'>'.$self->_escape_text($metaMap{$key}).'</Metadata>'."\n";
    990         }       
    991     }   
    992     }
    993    
    994     $metadatalist .='</MetadataList>';   
    995     return $metadatalist;
    996 }
    997 
    998 
    999761# $value is optional
    1000762sub delete_metadata {
Note: See TracChangeset for help on using the changeset viewer.