Changeset 13050


Ignore:
Timestamp:
2006-10-09T10:38:57+13:00 (18 years ago)
Author:
kjdon
Message:

moved all the mets outputting methods from doc.pm into METSPlugout. deleted output_dspace_section method from doc.pm cos it wasn't being used by anything. _escape_text is now a class method - so other classes can use it

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/doc.pm

    r13045 r13050  
    224224
    225225sub _escape_text {
     226    my $self = shift (@_);
    226227    my ($text) = @_;
    227228    # special characters in the gml encoding
     
    246247    # output metadata
    247248    foreach my $data (@{$section_ptr->{'metadata'}}) {
    248     my $escaped_value = &_escape_text($data->[1]);
     249    my $escaped_value = $self->_escape_text($data->[1]);
    249250    $all_text .= '    <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
    250251    }
     
    254255    # output the text
    255256    $all_text .= "  <Content>";
    256     $all_text .= &_escape_text($section_ptr->{'text'});
     257    $all_text .= $self->_escape_text($section_ptr->{'text'});
    257258    $all_text .= "</Content>\n";
    258259   
     
    272273}
    273274
    274 sub buffer_txt_section_xml {
    275     my $self = shift(@_);
    276     my ($section) = @_;
    277  
    278     my $section_ptr = $self->_lookup_section ($section);
    279    
    280     return "" unless defined $section_ptr;
    281    
    282     my $all_text = "<Section>\n";
    283    
    284     ##output the text
    285     #$all_text .= "   <Content>";
    286     $all_text .= &_escape_text($section_ptr->{'text'});
    287     #$all_text .= "   </Content>\n";
    288    
    289    
    290     #output all the subsections
    291     foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
    292        $all_text .= $self->buffer_txt_section_xml("$section.$subsection");
    293      }
    294 
    295      $all_text .= "</Section>\n";
    296 
    297      
    298      $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
    299      return $all_text;
    300 }
    301 
    302 sub buffer_mets_fileSection_section_xml() {
    303     my $self = shift(@_);
    304     my ($section,$version) = @_;
    305 
    306     #$section="" unless defined $section;
    307    
    308 
    309     my $section_ptr=$self->_lookup_section($section);
    310     return "" unless defined $section_ptr;
    311  
    312 
    313     # output fileSection by sections
    314     my $section_num ="1". $section;
    315      
    316     my $filePath = 'doctxt.xml';
    317 
    318     my $opt_owner_id = "";
    319     if ($version eq "fedora") {
    320     $opt_owner_id = "OWNERID=\"M\"";
    321     }
    322 
    323     # output the fileSection details
    324     my $all_text = '  <mets:fileGrp ID="FILEGROUP_PRELUDE' . $section_num . '">'. "\n";
    325     $all_text .= "    <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_owner_id >\n";
    326     $all_text .= '      <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$filePath.'#xpointer(/Section[';
    327    
    328     my $xpath = "1".$section;
    329     $xpath =~ s/\./]\/Section[/g;
    330    
    331     $all_text .=  $xpath;
    332 
    333     $all_text .= ']/text())" xlink:title="Hierarchical Document Structure"/>' . "\n";
    334     $all_text .= "    </mets:file>\n";
    335     $all_text .= "  </mets:fileGrp>\n";
    336 
    337 
    338     foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
    339     $all_text .= $self->buffer_mets_fileSection_section_xml("$section.$subsection",$version);
    340     }
    341    
    342     $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
    343 
    344     return $all_text;
    345 }
    346 
    347 sub buffer_mets_fileWhole_section_xml(){
    348     my $self = shift(@_);
    349     my ($section,$version,$working_dir) = @_;
    350 
    351     my $section_ptr = $self-> _lookup_section($section);
    352     return "" unless defined $section_ptr;
    353    
    354     my $all_text="" unless defined $all_txt;
    355 
    356     my $fileID=0;
    357 
    358     # Output the fileSection for the whole section
    359     #  => get the sourcefile and associative file
    360 
    361     my $id_root = "";
    362     my $opt_owner_id = "";   
    363 
    364     if ($version eq "fedora") {
    365     $opt_owner_id = "OWNERID=\"M\"";
    366     }
    367     else {
    368     $id_root = "default";
    369     }
    370 
    371     if ($version ne "fedora") {
    372     $all_text .= "  <mets:fileGrp ID=\"$id_root\">\n";
    373     }
    374 
    375     foreach my $data (@{$section_ptr->{'metadata'}}){
    376        my $escaped_value = &_escape_text($data->[1]);
    377 
    378        if (($data->[0] eq "gsdlsourcefilename") && ($version ne "fedora")) {
    379           my ($dirPath) = $escaped_value =~ m/^(.*)[\/\\][^\/\\]*$/;
    380 
    381           ++$fileID;     
    382           $all_text .= "    <mets:file MIMETYPE=\"text/xml\" ID=\"$id_root.$fileID\" $opt_owner_id >\n";
    383        
    384       $all_text .= '      <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$data->[1].'" />'."\n";
    385      
    386           $all_text .= "    </mets:file>\n";
    387        }
    388        
    389        if ($data->[0] eq "gsdlassocfile"){
    390        
    391        $escaped_value =~ m/^(.*?):(.*):(.*)$/;
    392        my $assoc_file = $1;
    393        my $mime_type  = $2;
    394        my $assoc_dir  = $3;
    395        
    396        if ($version eq "fedora") {
    397            $id_root = $assoc_file;
    398            $id_root =~ s/\//_/g;
    399            $all_text .= "  <mets:fileGrp ID=\"$id_root\">\n";
    400        }
    401        
    402        my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file";
    403        ++$fileID;
    404        
    405        my $mime_attr   = "MIMETYPE=\"$mime_type\"";
    406        my $xlink_title = "xlink:title=\"$assoc_file\"";
    407 
    408        my $id_attr;
    409        my $xlink_href;
    410 
    411        if ($version eq "fedora") {
    412            $id_attr = "ID=\"$id_root.0\"";
    413 
    414            my $fedora_prefix = $ENV{'FEDORA_PREFIX'};
    415            if (!defined $fedora_prefix) {
    416            $xlink_href  = "xlink:href=\"$assfilePath\"";
    417            }
    418            else
    419            {
    420            my $gsdlhome = $ENV{'GSDLHOME'};
    421            my $gsdl_href = "$working_dir/$assfilePath";
    422 
    423            $gsdl_href =~ s/^$gsdlhome(\/)?//;
    424            $gsdl_href = "/gsdl/$gsdl_href";
    425 
    426            my $fserver = $ENV{'FEDORA_HOSTNAME'};
    427            my $fport = $ENV{'FEDORA_SERVER_PORT'};
    428 
    429            my $fdomain = "http://$fserver:$fport";
    430            $xlink_href  = "xlink:href=\"$fdomain$gsdl_href\"";
    431            }
    432 
    433            my $top_section = $self->get_top_section();
    434            my $id = $self->get_metadata_element($top_section,"Identifier");
    435        }
    436        else {
    437            $id_attr = "ID=\"$id_root.$fileID\"";
    438            $xlink_href  = "xlink:href=\"$assfilePath\"";
    439        }
    440 
    441        $all_text .= "    <mets:file $mime_attr $id_attr $opt_owner_id >\n";
    442        $all_text .= "      <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
    443        
    444        $all_text .= "    </mets:file>\n";
    445        
    446        if ($version eq "fedora") {
    447            $all_text .= "  </mets:fileGrp>\n";
    448        }
    449        
    450        }
    451    }
    452    
    453     if ($version ne "fedora") {
    454     $all_text .= "  </mets:fileGrp>\n";
    455     }         
    456    
    457     $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
    458    
    459     return $all_text;
    460 }
    461 
    462 sub buffer_mets_StructMapSection_section_xml(){
    463     my $self = shift(@_);
    464     my ($section, $order_numref) = @_;
    465 
    466     $section="" unless defined $section;
    467    
    468    
    469     my $section_ptr=$self->_lookup_section($section);
    470     return "" unless defined $section_ptr;
    471 
    472 
    473     # output fileSection by Sections
    474     my $section_num ="1". $section;
    475     my $dmd_num = $section_num;
    476 
    477     ##**output the dmdSection details 
    478     #if ($section_num eq "1") {
    479     #   $dmd_num = "0";
    480     #}
    481 
    482     #**output the StructMap details
    483  
    484     my $dmdid_attr = "DM$dmd_num";
    485 
    486     my $all_text = "  <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
    487     $all_text .= '      ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
    488     $all_text .= "      LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
    489    
    490     $all_text .= '    <mets:fptr FILEID="FILEGROUP_PRELUDE'.$section_num.'" />'. "\n";
    491 
    492 
    493     foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
    494        $all_text .= $self->buffer_mets_StructMapSection_section_xml("$section.$subsection", $order_numref);
    495     }
    496    
    497     $all_text .= "  </mets:div>\n";
    498 
    499     $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
    500 
    501     return $all_text;
    502 }
    503 
    504 
    505 sub buffer_mets_StructMapWhole_section_xml(){
    506     my $self = shift(@_);
    507     my ($section) = @_;
    508    
    509     my $section_ptr = $self-> _lookup_section($section);
    510     return "" unless defined $section_ptr;
    511    
    512     my $all_text="" unless defined $all_txt;   
    513     my $fileID=0;
    514     my $order_num = 0;
    515 
    516     $all_text .= '  <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
    517  
    518     #** output the StructMapSection for the whole section
    519     #  get the sourcefile and associative file
    520 
    521     foreach my $data (@{$section_ptr->{'metadata'}}){
    522        my $escaped_value = &_escape_text($data->[1]);
    523    
    524        if ($data->[0] eq "gsdlsourcefilename") {
    525           ++$fileID;
    526       $all_text .= '    <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
    527        }
    528        
    529        if ($data->[0] eq "gsdlassocfile"){
    530           ++$fileID;
    531       $all_text .= '    <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
    532        }
    533     }
    534     $all_text .= "  </mets:div>\n";
    535    
    536     $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
    537    
    538     return $all_text;
    539 }
    540 
    541 
    542 sub buffer_mets_dmdSection_section_xml(){
    543     my $self = shift(@_);
    544     my ($section,$version) = @_;
    545    
    546     $section="" unless defined $section;
    547    
    548     my $section_ptr=$self->_lookup_section($section);
    549     return "" unless defined $section_ptr;
    550 
    551     # convert section number
    552     my $section_num ="1". $section;
    553     my $dmd_num = $section_num;
    554 
    555     # #**output the dmdSection details 
    556     # if ($section_num eq "1") {
    557     #   $dmd_num = "0";
    558     # }
    559 
    560 
    561     my $all_text = "";
    562 
    563     my $label_attr = "";
    564     if ($version eq "fedora") {
    565     $all_text .= "<mets:amdSec ID=\"DC\" >\n";
    566     $all_text .= "  <mets:techMD ID=\"DC.0\">\n"; # .0 fedora version number?
    567 
    568     $label_attr = "LABEL=\"Dublin Core Metadata\"";
    569     }
    570     else {
    571     # TODO::
    572     #print STDERR "***** Check that GROUPID in dmdSec is valid!!!\n";
    573     #print STDERR "***** Check to see if <techMD> required\n";
    574     # if it isn't allowed, go back and set $mdTag = dmdSec/amdSec
    575 
    576     $all_text .= "<mets:dmdSec ID=\"DM$dmd_num\" GROUPID=\"$section_num\">\n";
    577     }
    578 
    579     $all_text .= "  <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"gsdl$section_num\">\n";
    580     $all_text .= "    <mets:xmlData>\n";
    581 
    582     if ($version eq "fedora") {
    583     my $dc_namespace = "";
    584     $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
    585     $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\">\n";
    586 
    587     $all_text .= "  <oai_dc:dc $dc_namespace>\n";
    588 
    589     $all_text .= $self->buffer_dc_section($section,"oai_dc");
    590     $all_text .= "  </oai_dc:dc>\n";
    591     }
    592     else {
    593     foreach my $data (@{$section_ptr->{'metadata'}}){
    594         my $escaped_value = &_escape_text($data->[1]);
    595         $all_text .= '      <gsdl3:Metadata name="'. $data->[0].'">'. $escaped_value. "</gsdl3:Metadata>\n";
    596         if ($data->[0] eq "dc.Title") {
    597         $all_text .= '      <gsdl3:Metadata name="Title">'. $escaped_value."</gsdl3:Metadata>\n";
    598         }
    599     }
    600     }
    601    
    602     $all_text .= "    </mets:xmlData>\n";
    603     $all_text .= "  </mets:mdWrap>\n";
    604    
    605     if ($version eq "fedora") {
    606     $all_text .= "  </mets:techMD>\n";
    607     $all_text .= "</mets:amdSec>\n";
    608     }
    609     else {
    610     $all_text .= "</mets:dmdSec>\n";   
    611     }
    612 
    613 
    614     foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
    615        $all_text .= $self->buffer_mets_dmdSection_section_xml("$section.$subsection",$version);
    616     }
    617    
    618     $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
    619 
    620     return $all_text;
    621 }
     275
     276
    622277
    623278sub output_section {
     
    626281
    627282    print $handle $self->buffer_section_xml($section);
    628 }
    629 
    630 #  print out DSpace dublin_core metadata section
    631 sub output_dspace_section {
    632     my $self = shift (@_);
    633     my ($handle, $section) = @_;
    634 
    635     my $section_ptr = $self->_lookup_section ($section);
    636     return "" unless defined $section_ptr;
    637 
    638     my $all_text = "<Section>\n";
    639     $all_text .= "  <Description>\n";
    640    
    641     # output metadata
    642     foreach my $data (@{$section_ptr->{'metadata'}}) {
    643     my $escaped_value = &_escape_text($data->[1]);
    644     $all_text .= '    <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
    645     }
    646 
    647     $all_text .= "  </Description>\n";
    648     $all_text .=  "</Section>\n";
    649 
    650     # make sure no nasty control characters have snuck through
    651     # (XML::Parser will barf on anything it doesn't consider to be
    652     # valid UTF-8 text, including things like \c@, \cC etc.)
    653     $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
    654 
    655     return $all_text;
    656 }
    657 
    658 #  print out doctxt.xml file
    659 sub output_txt_section {
    660     my $self = shift (@_);
    661     my ($handle, $section) = @_;
    662 
    663     print $handle $self->buffer_txt_section_xml($section);
    664 }
    665 
    666 #  print out docmets.xml file
    667 sub output_mets_section {
    668     my $self = shift(@_);
    669     my ($handle, $section, $version, $working_dir) = @_;
    670 
    671     # print out the dmdSection
    672     print $handle $self->buffer_mets_dmdSection_section_xml($section, $version);
    673 
    674     print $handle "<mets:fileSec>\n";
    675     if ($version eq "fedora") {
    676     print $handle "  <mets:fileGrp ID=\"DATASTREAMS\">\n";
    677     }
    678 
    679     # print out the fileSection by sections
    680     print $handle $self->buffer_mets_fileSection_section_xml($section,$version);
    681 
    682     # print out the whole fileSection
    683     print $handle $self->buffer_mets_fileWhole_section_xml($section,$version,$working_dir);
    684 
    685     if ($version eq "fedora") {
    686     print $handle "  </mets:fileGrp>\n";
    687     }
    688     print $handle "</mets:fileSec>\n";
    689  
    690     # print out the StructMapSection by sections
    691 
    692     my $struct_type;
    693     if ($version eq "fedora") {
    694     $struct_type = "fedora:dsBindingMap";
    695     }
    696     else {
    697     $struct_type = "Section";
    698     }
    699 
    700     if ($version ne "fedora") {
    701     print $handle "<mets:structMap ID=\"Section\" TYPE=\"$struct_type\" LABEL=\"Section\">\n";
    702     my $order_num=0;
    703     print $handle $self->buffer_mets_StructMapSection_section_xml($section, \$order_num);
    704     print $handle "</mets:structMap>\n";
    705 
    706     print $handle '<mets:structMap ID="All" TYPE="Whole Document" LABEL="All">'."\n";
    707     print $handle $self->buffer_mets_StructMapWhole_section_xml($section);
    708     print $handle "</mets:structMap>\n";
    709     }
    710  
    711283}
    712284
     
    744316    return "" unless defined $section_ptr;
    745317    foreach my $data (@{$section_ptr->{'metadata'}}){
    746     my $escaped_value = &_escape_text($data->[1]);
     318    my $escaped_value = $self->_escape_text($data->[1]);
    747319    my $dc_element =  $data->[0];
    748320   
     
    788360    }   
    789361    print "\n";
    790     my $escaped_value = &_escape_text($data->[1]);
     362    my $escaped_value = $self->_escape_text($data->[1]);
    791363    if ($data->[0]=~ m/^dc\./) {
    792364        $data->[0] =~ tr/[A-Z]/[a-z]/;
     
    1415987        my %metaMap = @$j;
    1416988        foreach my $key (keys %metaMap){
    1417         $metadatalist .='<Metadata name='."\"$key\"".'>'.&_escape_text($metaMap{$key}).'</Metadata>'."\n";
     989        $metadatalist .='<Metadata name='."\"$key\"".'>'.$self->_escape_text($metaMap{$key}).'</Metadata>'."\n";
    1418990        }       
    1419991    }   
Note: See TracChangeset for help on using the changeset viewer.