Changeset 13172


Ignore:
Timestamp:
2006-10-27T13:41:01+13:00 (17 years ago)
Author:
kjdon
Message:

Moved all printing stuff out of doc.pm.
docprint now prints a GA representation of a doc obj - use &docprint::get_section_xml instead of $doc_obj->buffer_section_xml or $doc_obj->output_section.
Most of the code has been moved into plugouts, except for the bit thats gone to docprint.pm.

Location:
trunk/gsdl/perllib
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/doc.pm

    r13050 r13172  
    3939use File::stat;
    4040##use hashdoc;
     41use docprint;
    4142
    4243# the document type may be indexed_doc, nonindexed_doc, or
     
    221222    return $doc_type if (defined $doc_type);
    222223    return "indexed_doc";
    223 }
    224 
    225 sub _escape_text {
    226     my $self = shift (@_);
    227     my ($text) = @_;
    228     # special characters in the gml encoding
    229     $text =~ s/&/&/g; # this has to be first...
    230     $text =~ s/</&lt;/g;
    231     $text =~ s/>/&gt;/g;
    232     $text =~ s/\"/&quot;/g;
    233 
    234     return $text;
    235 }
    236 
    237 sub buffer_section_xml {
    238     my $self = shift (@_);
    239     my ($section) = @_;
    240 
    241     my $section_ptr = $self->_lookup_section ($section);
    242     return "" unless defined $section_ptr;
    243 
    244     my $all_text = "<Section>\n";
    245     $all_text .= "  <Description>\n";
    246    
    247     # output metadata
    248     foreach my $data (@{$section_ptr->{'metadata'}}) {
    249     my $escaped_value = $self->_escape_text($data->[1]);
    250     $all_text .= '    <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
    251     }
    252 
    253     $all_text .= "  </Description>\n";
    254 
    255     # output the text
    256     $all_text .= "  <Content>";
    257     $all_text .= $self->_escape_text($section_ptr->{'text'});
    258     $all_text .= "</Content>\n";
    259    
    260     # output all the subsections
    261     foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {
    262     $all_text .= $self->buffer_section_xml("$section.$subsection");
    263     }
    264    
    265     $all_text .=  "</Section>\n";
    266 
    267     # make sure no nasty control characters have snuck through
    268     # (XML::Parser will barf on anything it doesn't consider to be
    269     # valid UTF-8 text, including things like \c@, \cC etc.)
    270     $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
    271 
    272     return $all_text;
    273 }
    274 
    275 
    276 
    277 
    278 sub output_section {
    279     my $self = shift (@_);
    280     my ($handle, $section) = @_;
    281 
    282     print $handle $self->buffer_section_xml($section);
    283 }
    284 
    285 my $dc_set = { Title => 1,       
    286            Creator => 1,
    287            Subject => 1,
    288            Description => 1,
    289            Publisher => 1,
    290            Contributor => 1,
    291            Date => 1,
    292            Type => 1,
    293            Format => 1,
    294            Identifier => 1,
    295            Source => 1,
    296            Language => 1,
    297            Relation => 1,
    298            Coverage => 1,
    299            Rights => 1};
    300 
    301 
    302 
    303 
    304 # Build up dublin_core metadata.  Priority given to dc.* over ex.*
    305 # This method was apparently added by Jeffrey and committed by Shaoqun.
    306 # But we don't know why it was added, so not using it anymore.
    307 sub new_buffer_dc_section {
    308    
    309     my $self = shift(@_);
    310     my ($section, $version) = @_;
    311 
    312     # build up string of dublin core metadata
    313     $section="" unless defined $section;
    314    
    315     my $section_ptr=$self->_lookup_section($section);
    316     return "" unless defined $section_ptr;
    317     foreach my $data (@{$section_ptr->{'metadata'}}){
    318     my $escaped_value = $self->_escape_text($data->[1]);
    319     my $dc_element =  $data->[0];
    320    
    321     my @array = split('\.',$dc_element);
    322     my ($type,$name);
    323 
    324     if(defined $array[1])
    325     {
    326         $type = $array[0];
    327         $name = $array[1];
    328     }
    329     else
    330     {
    331         $type = "ex";
    332         $name = $array[0];
    333     }
    334    
    335     $all_text .= '   <Metadata Type="'. $type.'" Name="'.$name.'">'. $escaped_value. "</Metadata>\n";
    336     }
    337     return $all_text;
    338 }
    339 
    340 
    341 sub buffer_dc_section {
    342     my $self = shift(@_);
    343     my ($section, $version) = @_;
    344    
    345     # build up string of dublin core metadata
    346     $section="" unless defined $section;
    347    
    348     my $section_ptr=$self->_lookup_section($section);
    349     return "" unless defined $section_ptr;
    350 
    351 
    352     my $explicit_dc = {};
    353     my $explicit_ex = {};
    354 
    355     my $all_text="";
    356     foreach my $data (@{$section_ptr->{'metadata'}}){
    357     foreach my $temp (@$data)
    358     {
    359         print "($temp) ";
    360     }   
    361     print "\n";
    362     my $escaped_value = $self->_escape_text($data->[1]);
    363     if ($data->[0]=~ m/^dc\./) {
    364         $data->[0] =~ tr/[A-Z]/[a-z]/;
    365 
    366         $data->[0] =~ m/^dc\.(.*)/;
    367         my $dc_element =  $1;
    368 
    369         if (!defined $explicit_dc->{$dc_element}) {
    370         $explicit_dc->{$dc_element} = [];
    371         }
    372         push(@{$explicit_dc->{$dc_element}},$escaped_value);
    373 
    374         #$all_text .= '   <dcvalue element="'. $data->[0].'" qualifier="#####">'. $escaped_value. "</dcvalue>\n";
    375         if (defined $version && ($version eq "oai_dc")) {
    376         $all_text .= "   <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
    377         }
    378         else {
    379         $all_text .= '   <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
    380         }
    381 
    382     }
    383     elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) {
    384         $data->[0] =~ m/^(ex\.)?(.*)/;
    385         my $ex_element =  $2;
    386         my $lc_ex_element = lc($ex_element);
    387 
    388         if (defined $dc_set->{$ex_element}) {
    389         if (!defined $explicit_ex->{$lc_ex_element}) {
    390             $explicit_ex->{$lc_ex_element} = [];
    391         }
    392         push(@{$explicit_ex->{$lc_ex_element}},$escaped_value);
    393         }
    394     }
    395     }
    396 
    397     # go through dc_set and for any element *not* defined in explicit_dc
    398     # that do exist in explicit_ex, add it in as metadata
    399     foreach my $k ( keys %$dc_set ) {
    400     my $lc_k = lc($k);
    401 
    402     if (!defined $explicit_dc->{$lc_k}) {
    403         if (defined $explicit_ex->{$lc_k}) {
    404 
    405         foreach my $v (@{$explicit_ex->{$lc_k}}) {
    406             my $dc_element    = $lc_k;
    407             my $escaped_value = $v;
    408 
    409             if (defined $version && ($version eq "oai_dc")) {
    410             $all_text .= "   <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
    411             }
    412             else {
    413             $all_text .= '   <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
    414             }
    415            
    416         }
    417         }
    418     }
    419     }
    420 
    421     if ($all_text eq "") {
    422     $all_text .= "   There is no Dublin Core metatdata in this document\n";
    423     }   
    424     $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
    425 
    426     return $all_text;
    427 }
    428 
    429 
    430 # Print out dublin_core metadata
    431 # changed back to using old method, don't know why Jeffrey changed this
    432 sub output_dc_section {
    433     my $self = shift(@_);
    434     my ($handle, $section, $version) = @_;
    435    
    436     my $all_text = $self->buffer_dc_section($section,$version);
    437     #my $all_text = $self->new_buffer_dc_section($section,$version);
    438    
    439     print $handle $all_text;
    440224}
    441225
     
    555339            print STDERR "doc::set_OID could not write to $filename\n";
    556340        } else {
    557             $self->output_section('OUTFILE', $self->get_top_section(),
    558                       undef, 1);
     341            my $doc_text = &docprint::get_section_xml($self, $self->get_top_section());
     342            print OUTFILE $doc_text;
    559343            close (OUTFILE);
    560344        }
     
    576360    if (!defined $OID)
    577361    {
    578     my $hash_text = $self->buffer_section_gml($self->get_top_section(),
    579                           undef, 1);
     362    my $hash_text = &docprint::get_section_xml($self, $self->get_top_section());
    580363    my $hash_len = length($hash_text);
    581364
     
    976759}
    977760
    978 # returns a xml element of the form <MetadataList><Metadata name="metadata-name">metadata_value</Metadata>...</MetadataList>
    979 sub get_top_metadata_list{
    980     my $self = shift (@_);
    981 
    982     my @topmetadata =$self->get_all_metadata($self->get_top_section());
    983     my $metadatalist ='<MetadataList>';
    984    
    985     foreach my $i (@topmetadata){
    986     foreach my $j (@$i){   
    987         my %metaMap = @$j;
    988         foreach my $key (keys %metaMap){
    989         $metadatalist .='<Metadata name='."\"$key\"".'>'.$self->_escape_text($metaMap{$key}).'</Metadata>'."\n";
    990         }       
    991     }   
    992     }
    993    
    994     $metadatalist .='</MetadataList>';   
    995     return $metadatalist;
    996 }
    997 
    998 
    999761# $value is optional
    1000762sub delete_metadata {
  • trunk/gsdl/perllib/plugouts/BasPlugout.pm

    r13064 r13172  
    4141
    4242my $arguments = [
    43       { 'name' => "group_size",
     43       { 'name' => "group_size",
    4444    'desc' => "{BasPlugout.group_size}",
    4545    'type' => "int",
     
    7373    'type' => "flag",
    7474    'reqd' => "no", 
    75         'hiddengli' => "no"}                   
     75        'hiddengli' => "no"},
     76       { 'name' => "debug",
     77     'desc' => "{BasPlugout.debug}",
     78     'type' => "flag",
     79     'reqd' => "no",
     80     'hiddengli' => "yes"}
    7681];
    7782
     
    483488    # copy all the associated files, add this information as metadata
    484489    # to the document
    485         print STDERR "Writing associated files to $doc_dir\n";
     490        print $outhandle "Writing associated files to $doc_dir\n";
    486491    $self->process_assoc_files ($doc_obj, $doc_dir);
    487492    }
    488493
    489494    # save this document
    490     $doc_obj->output_section('BasPlugout::GROUPPROCESS', $doc_obj->get_top_section());
     495    my $section_text = &docprint::get_section_xml($doc_obj,$doc_obj->get_top_section());
     496    print GROUPPROCESS $section_text;
    491497
    492498    $self->{'gs_count'}++;
     
    717723}
    718724
     725
    719726#the subclass should implement this method if is_group method could return 1.
    720727sub close_group_output{
     
    727734}
    728735
     736my $dc_set = { Title => 1,       
     737           Creator => 1,
     738           Subject => 1,
     739           Description => 1,
     740           Publisher => 1,
     741           Contributor => 1,
     742           Date => 1,
     743           Type => 1,
     744           Format => 1,
     745           Identifier => 1,
     746           Source => 1,
     747           Language => 1,
     748           Relation => 1,
     749           Coverage => 1,
     750           Rights => 1};
     751
     752
     753# returns an XML representation of the dublin core metadata
     754# if dc meta is not found, try ex mete
     755sub get_dc_metadata {
     756    my $self = shift(@_);
     757    my ($doc_obj, $section, $version) = @_;
     758   
     759    # build up string of dublin core metadata
     760    $section="" unless defined $section;
     761   
     762    my $section_ptr = $doc_obj->_lookup_section($section);
     763    return "" unless defined $section_ptr;
     764
     765
     766    my $explicit_dc = {};
     767    my $explicit_ex = {};
     768
     769    my $all_text="";
     770    foreach my $data (@{$section_ptr->{'metadata'}}){
     771    my $escaped_value = &docprint::escape_text($data->[1]);
     772    if ($data->[0]=~ m/^dc\./) {
     773        $data->[0] =~ tr/[A-Z]/[a-z]/;
     774
     775        $data->[0] =~ m/^dc\.(.*)/;
     776        my $dc_element =  $1;
     777
     778        if (!defined $explicit_dc->{$dc_element}) {
     779        $explicit_dc->{$dc_element} = [];
     780        }
     781        push(@{$explicit_dc->{$dc_element}},$escaped_value);
     782
     783        if (defined $version && ($version eq "oai_dc")) {
     784        $all_text .= "   <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
     785        }
     786        else {
     787        # qualifier???
     788        $all_text .= '   <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
     789        }
     790
     791    }
     792    elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) {
     793        $data->[0] =~ m/^(ex\.)?(.*)/;
     794        my $ex_element =  $2;
     795        my $lc_ex_element = lc($ex_element);
     796
     797        if (defined $dc_set->{$ex_element}) {
     798        if (!defined $explicit_ex->{$lc_ex_element}) {
     799            $explicit_ex->{$lc_ex_element} = [];
     800        }
     801        push(@{$explicit_ex->{$lc_ex_element}},$escaped_value);
     802        }
     803    }
     804    }
     805
     806    # go through dc_set and for any element *not* defined in explicit_dc
     807    # that do exist in explicit_ex, add it in as metadata
     808    foreach my $k ( keys %$dc_set ) {
     809    my $lc_k = lc($k);
     810
     811    if (!defined $explicit_dc->{$lc_k}) {
     812        if (defined $explicit_ex->{$lc_k}) {
     813
     814        foreach my $v (@{$explicit_ex->{$lc_k}}) {
     815            my $dc_element    = $lc_k;
     816            my $escaped_value = $v;
     817
     818            if (defined $version && ($version eq "oai_dc")) {
     819            $all_text .= "   <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
     820            }
     821            else {
     822            $all_text .= '   <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
     823            }
     824           
     825        }
     826        }
     827    }
     828    }
     829
     830    if ($all_text eq "") {
     831    $all_text .= "   There is no Dublin Core metatdata in this document\n";
     832    }   
     833    $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
     834
     835    return $all_text;
     836}
     837
     838# Build up dublin_core metadata.  Priority given to dc.* over ex.*
     839# This method was apparently added by Jeffrey and committed by Shaoqun.
     840# But we don't know why it was added, so not using it anymore.
     841sub new_get_dc_metadata {
     842   
     843    my $self = shift(@_);
     844    my ($doc_obj, $section, $version) = @_;
     845
     846    # build up string of dublin core metadata
     847    $section="" unless defined $section;
     848   
     849    my $section_ptr=$doc_obj->_lookup_section($section);
     850    return "" unless defined $section_ptr;
     851
     852    my $all_text = "";
     853    foreach my $data (@{$section_ptr->{'metadata'}}){
     854    my $escaped_value = &docprint::escape_text($data->[1]);
     855    my $dc_element =  $data->[0];
     856   
     857    my @array = split('\.',$dc_element);
     858    my ($type,$name);
     859
     860    if(defined $array[1])
     861    {
     862        $type = $array[0];
     863        $name = $array[1];
     864    }
     865    else
     866    {
     867        $type = "ex";
     868        $name = $array[0];
     869    }
     870   
     871    $all_text .= '   <Metadata Type="'. $type.'" Name="'.$name.'">'. $escaped_value. "</Metadata>\n";
     872    }
     873    return $all_text;
     874}
     875
     876
    7298771;
  • trunk/gsdl/perllib/plugouts/DSpacePlugout.pm

    r12363 r13172  
    113113   
    114114    $self->output_xml_header($outhandler, "dublin_core",1);
    115     $doc_obj->output_dc_section($outhandler,$doc_obj->get_top_section);
     115
     116    my $all_text = $self->get_dc_metadata($doc_obj, $doc_obj->get_top_section());
     117    print $outhandler $all_text;
     118
    116119    $self->output_xml_footer($outhandler,"dublin_core");
    117120   
  • trunk/gsdl/perllib/plugouts/GAPlugout.pm

    r12363 r13172  
    2828use strict;
    2929no strict 'refs';
     30no strict 'subs';
    3031
    3132eval {require bytes};
    3233use util;
    3334use BasPlugout;
     35use docprint;
    3436
    3537sub BEGIN {
     
    6163    my ($doc_obj,$doc_dir) = @_;
    6264
    63     my $output_dir = $self->get_output_dir();
    64     &util::mk_all_dir ($output_dir) unless -e $output_dir;
    65 
    66     my $working_dir = &util::filename_cat ($output_dir, $doc_dir);   
    67     &util::mk_all_dir ($working_dir) unless -e $working_dir;
    68 
    69     $self->process_assoc_files ($doc_obj, $doc_dir, '');
    70    
    71     my $output_file = util::filename_cat ($working_dir, "doc.xml");
    72 
    73     $self->open_xslt_pipe($output_file, $self->{'xslt_file'});
    74 
    75     my $outhandler;
    76 
    77     if (defined $self->{'xslt_writer'}){
    78        $outhandler = $self->{'xslt_writer'};
     65    my $outhandler;
     66    if ($self->{'debug'}) {
     67    $outhandler = STDOUT;
     68    # can we do the xslt and still do debug mode?
     69    }
     70    else {
     71    my $output_dir = $self->get_output_dir();
     72    &util::mk_all_dir ($output_dir) unless -e $output_dir;
     73   
     74    my $working_dir = &util::filename_cat ($output_dir, $doc_dir);   
     75    &util::mk_all_dir ($working_dir) unless -e $working_dir;
     76   
     77    $self->process_assoc_files ($doc_obj, $doc_dir, '');
     78   
     79    my $output_file = util::filename_cat ($working_dir, "doc.xml");
     80   
     81    $self->open_xslt_pipe($output_file, $self->{'xslt_file'});
     82   
     83   
     84    if (defined $self->{'xslt_writer'}){
     85        $outhandler = $self->{'xslt_writer'};
     86    }
     87    else{
     88        $outhandler = $self->get_output_handler($output_file);
     89    }
    7990    }
    80     else{
    81        $outhandler = $self->get_output_handler($output_file);
    82      }
    83    
     91   
    8492    $self->output_xml_header($outhandler,"Archive");
    85     $doc_obj->output_section($outhandler,$doc_obj->get_top_section());
     93    my $section_output = &docprint::get_section_xml($doc_obj, $doc_obj->get_top_section());
     94    print $outhandler $section_output;
    8695    $self->output_xml_footer($outhandler,"Archive");
    8796
    88     if (defined $self->{'xslt_writer'}){     
    89     $self->close_xslt_pipe();
     97    if (!$self->{'debug'}) {
     98    if (defined $self->{'xslt_writer'}){     
     99        $self->close_xslt_pipe();
     100    }
     101    else {
     102        close($outhandler);
     103    }
     104   
     105    $self->{'short_doc_file'} = util::filename_cat ($doc_dir, "doc.xml"); 
     106   
     107    $self->store_output_info_reference($doc_obj);
    90108    }
    91     else{
    92     close($outhandler);
    93     }
     109}
    94110
    95   $self->{'short_doc_file'} = util::filename_cat ($doc_dir, "doc.xml"); 
    96      
    97   $self->store_output_info_reference($doc_obj);
    98 }
     111
    99112
    1001131;
  • trunk/gsdl/perllib/plugouts/MARCXMLPlugout.pm

    r12605 r13172  
    3232use util;
    3333use BasPlugout;
     34use docprint; # for escape_text
     35
    3436
    3537sub BEGIN {
     
    117119
    118120    $self->output_xml_header($outhandler,"MARCXML");
    119     print $outhandler $doc_obj->get_top_metadata_list();
     121    print $outhandler $self->get_top_metadata_list($doc_obj);
    120122    $self->output_xml_footer($outhandler,"MARCXML"); 
    121123    $self->close_xslt_pipe();
     
    123125}
    124126
     127# returns a xml element of the form <MetadataList><Metadata name="metadata-name">metadata_value</Metadata>...</MetadataList>
     128
     129sub get_top_metadata_list {
     130
     131    my $self = shift (@_);
     132    my ($doc_obj) = @_;
     133   
     134    my @topmetadata =$doc_obj->get_all_metadata($doc_obj->get_top_section());
     135    my $metadatalist ='<MetadataList>';
     136   
     137    foreach my $i (@topmetadata){
     138    foreach my $j (@$i){   
     139        my %metaMap = @$j;
     140        foreach my $key (keys %metaMap){
     141        $metadatalist .='<Metadata name='."\"$key\"".'>'.&docprint::escape_text($metaMap{$key}).'</Metadata>'."\n";
     142        }       
     143    }   
     144    }
     145   
     146    $metadatalist .='</MetadataList>';   
     147    return $metadatalist;
     148}
    125149
    126150
  • trunk/gsdl/perllib/plugouts/METSPlugout.pm

    r13051 r13172  
    3232use util;
    3333use BasPlugout;
     34use docprint; # for escape_text
    3435
    3536sub BEGIN {
     
    218219   
    219220    my $all_text = "<Section>\n";
    220     $all_text .= $doc_obj->_escape_text("$section_ptr->{'text'}");
     221    $all_text .= &docprint::escape_text("$section_ptr->{'text'}");
    221222   
    222223    #output all the subsections
     
    326327    $all_text .= "  <oai_dc:dc $dc_namespace>\n";
    327328
    328     $all_text .= $doc_obj->buffer_dc_section($section,"oai_dc");
     329    $all_text .= $self->get_dc_metadata($doc_obj, $section,"oai_dc");
    329330    $all_text .= "  </oai_dc:dc>\n";
    330331    }
    331332    else {
    332333    foreach my $data (@{$section_ptr->{'metadata'}}){
    333         my $escaped_value = $doc_obj->_escape_text($data->[1]);
     334        my $escaped_value = &docprint::escape_text($data->[1]);
    334335        $all_text .= '      <gsdl3:Metadata name="'. $data->[0].'">'. $escaped_value. "</gsdl3:Metadata>\n";
    335336        if ($data->[0] eq "dc.Title") {
     
    420421
    421422    foreach my $data (@{$section_ptr->{'metadata'}}){
    422        my $escaped_value = $doc_obj->_escape_text($data->[1]);
     423       my $escaped_value = &docprint::escape_text($data->[1]);
    423424   
    424425       if ($data->[0] eq "gsdlsourcefilename") {
     
    513514
    514515    foreach my $data (@{$section_ptr->{'metadata'}}){
    515        my $escaped_value = $doc_obj->_escape_text($data->[1]);
     516       my $escaped_value = &docprint::escape_text($data->[1]);
    516517
    517518       if (($data->[0] eq "gsdlsourcefilename") && ($version ne "fedora")) {
Note: See TracChangeset for help on using the changeset viewer.