Ignore:
Timestamp:
2005-05-25T17:09:43+12:00 (19 years ago)
Author:
davidb
Message:

Code for saving documents for import.pl and export.pl repeatative in places
and at time inconsistent. These changes bring the code more in to line.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/doc.pm

    r9838 r9953  
    283283sub buffer_mets_fileSection_section_xml() {
    284284    my $self = shift(@_);
    285     my ($section) = @_;
     285    my ($section,$version) = @_;
    286286
    287287    #$section="" unless defined $section;
     
    292292 
    293293
    294     #**output fileSection by sections
     294    # output fileSection by sections
    295295    my $section_num ="1". $section;
    296  
    297    
    298     #my $filePath = $doc_Dir . '/doctxt.xml';
    299 
     296     
    300297    my $filePath = 'doctxt.xml';
    301298
    302     #**output the fileSection details
     299    my $opt_owner_id = "";
     300    if ($version eq "fedora") {
     301    $opt_owner_id = "OWNERID=\"M\"";
     302    }
     303
     304    # output the fileSection details
    303305    my $all_text = '  <mets:fileGrp ID="FILEGROUP_PRELUDE' . $section_num . '">'. "\n";
    304     $all_text .= '    <mets:file MIMETYPE="text/xml" ID="FILE'.$section_num. '">'. "\n";
     306    $all_text .= "    <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_owner_id >\n";
    305307    $all_text .= '      <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$filePath.'#xpointer(/Section[';
    306308   
    307309    my $xpath = "1".$section;
    308  
    309310    $xpath =~ s/\./]\/Section[/g;
    310311   
    311312    $all_text .=  $xpath;
    312313
    313     $all_text .= ']/text())" />' . "\n";
     314    $all_text .= ']/text())" xlink:title="Hierarchical Document Structure"/>' . "\n";
    314315    $all_text .= "    </mets:file>\n";
    315316    $all_text .= "  </mets:fileGrp>\n";
     
    317318
    318319    foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
    319     $all_text .= $self->buffer_mets_fileSection_section_xml("$section.$subsection");
     320    $all_text .= $self->buffer_mets_fileSection_section_xml("$section.$subsection",$version);
    320321    }
    321322   
     
    327328sub buffer_mets_fileWhole_section_xml(){
    328329    my $self = shift(@_);
    329     my ($section) = @_;
     330    my ($section,$version,$working_dir) = @_;
    330331
    331332    my $section_ptr = $self-> _lookup_section($section);
     
    334335    my $all_text="" unless defined $all_txt;
    335336
    336     my ($dirPath)="" unless defined $dirPath;
    337337    my $fileID=0;
    338338
    339     #** output the fileSection for the whole section
    340     #*** get the sourcefile and associative file
     339    # Output the fileSection for the whole section
     340    #  => get the sourcefile and associative file
     341
     342    my $id_root = "";
     343    my $opt_owner_id = "";   
     344
     345    if ($version eq "fedora") {
     346    $opt_owner_id = "OWNERID=\"M\"";
     347    }
     348    else {
     349    $id_root = "default";
     350    }
     351
     352    if ($version ne "fedora") {
     353    $all_text .= "  <mets:fileGrp ID=\"$id_root\">\n";
     354    }
    341355
    342356    foreach my $data (@{$section_ptr->{'metadata'}}){
    343357       my $escaped_value = &_escape_text($data->[1]);
    344        if ($data->[0] eq "gsdlsourcefilename") {
    345           ($dirPath) = $escaped_value =~ m/^(.*)[\/\\][^\/\\]*$/;
    346      
    347       $all_text .= '  <mets:fileGrp ID="default">'."\n";
    348           ++$fileID;
    349           $all_text .= '    <mets:file MIMETYPE="text/xml" ID="default.'.$fileID.'">'. "\n";
     358
     359       if (($data->[0] eq "gsdlsourcefilename") && ($version ne "fedora")) {
     360          my ($dirPath) = $escaped_value =~ m/^(.*)[\/\\][^\/\\]*$/;
     361
     362          ++$fileID;     
     363          $all_text .= "    <mets:file MIMETYPE=\"text/xml\" ID=\"$id_root.$fileID\" $opt_owner_id >\n";
    350364       
    351365      $all_text .= '      <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$data->[1].'" />'."\n";
     
    355369       
    356370       if ($data->[0] eq "gsdlassocfile"){
    357 
     371       
    358372       $escaped_value =~ m/^(.*?):(.*):(.*)$/;
    359373       my $assoc_file = $1;
    360374       my $mime_type  = $2;
    361375       my $assoc_dir  = $3;
    362 
    363       my $assfilePath = $dirPath . '/'. $assoc_file;
    364           ++$fileID;
    365 
    366       $all_text .= '    <mets:file MIMETYPE="'.$mime_type.'" ID="default.'.$fileID. '">'. "\n";
    367       $all_text .= '      <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$assfilePath.'" />'."\n";
    368          
    369       $all_text .= "    </mets:file>\n";
     376       
     377       if ($version eq "fedora") {
     378           $id_root = $assoc_file;
     379           $id_root =~ s/\//_/g;
     380           $all_text .= "  <mets:fileGrp ID=\"$id_root\">\n";
     381       }
     382       
     383       my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file";
     384       ++$fileID;
     385       
     386       my $mime_attr   = "MIMETYPE=\"$mime_type\"";
     387       my $xlink_title = "xlink:title=\"$assoc_file\"";
     388
     389       my $id_attr;
     390       my $xlink_href;
     391
     392       if ($version eq "fedora") {
     393           $id_attr = "ID=\"$id_root.0\"";
     394
     395           my $fedora_prefix = $ENV{'FEDORA_PREFIX'};
     396           if (!defined $fedora_prefix) {
     397           $xlink_href  = "xlink:href=\"$assfilePath\"";
     398           }
     399           else
     400           {
     401           my $gsdlhome = $ENV{'GSDLHOME'};
     402           my $gsdl_href = "$working_dir/$assfilePath";
     403
     404           $gsdl_href =~ s/^$gsdlhome(\/)?//;
     405           $gsdl_href = "/gsdl/$gsdl_href";
     406
     407           my $fserver = $ENV{'FEDORA_HOSTNAME'};
     408           my $fport = $ENV{'FEDORA_SERVER_PORT'};
     409
     410           my $fdomain = "http://$fserver:$fport";
     411           $xlink_href  = "xlink:href=\"$fdomain$gsdl_href\"";
     412           }
     413
     414           my $top_section = $self->get_top_section();
     415           my $id = $self->get_metadata_element($top_section,"Identifier");
     416       }
     417       else {
     418           $id_attr = "ID=\"$id_root.$fileID\"";
     419           $xlink_href  = "xlink:href=\"$assfilePath\"";
     420       }
     421
     422       $all_text .= "    <mets:file $mime_attr $id_attr $opt_owner_id >\n";
     423       $all_text .= "      <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
     424       
     425       $all_text .= "    </mets:file>\n";
     426       
     427       if ($version eq "fedora") {
     428           $all_text .= "  </mets:fileGrp>\n";
     429       }
     430       
    370431       }
    371     }
    372     $all_text .= "  </mets:fileGrp>\n";
    373                                            
    374                                      
     432   }
     433   
     434    if ($version ne "fedora") {
     435    $all_text .= "  </mets:fileGrp>\n";
     436    }         
     437   
    375438    $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
    376439   
     
    378441}
    379442
    380 sub buffer_mets_StruMapSection_section_xml(){
     443sub buffer_mets_StructMapSection_section_xml(){
    381444    my $self = shift(@_);
    382445    my ($section, $order_numref) = @_;
     
    389452
    390453
    391     #***output fileSection by Sections
     454    # output fileSection by Sections
    392455    my $section_num ="1". $section;
    393456    my $dmd_num = $section_num;
     
    398461    #}
    399462
    400     #**output the StruMap details
     463    #**output the StructMap details
    401464 
    402     my $all_text = '  <mets:div ID="DS'. $section_num .'" TYPE="Section" ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" LABEL="';
    403 
    404     $all_text .= $section_num . '" DMDID="DM'.$dmd_num.'">'. "\n";
     465    my $dmdid_attr = "DM$dmd_num";
     466
     467    my $all_text = "  <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
     468    $all_text .= '      ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
     469    $all_text .= "      LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
    405470   
    406471    $all_text .= '    <mets:fptr FILEID="FILEGROUP_PRELUDE'.$section_num.'" />'. "\n";
     
    408473
    409474    foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
    410        $all_text .= $self->buffer_mets_StruMapSection_section_xml("$section.$subsection", $order_numref);
     475       $all_text .= $self->buffer_mets_StructMapSection_section_xml("$section.$subsection", $order_numref);
    411476    }
    412477   
     
    419484
    420485
    421 sub buffer_mets_StruMapWhole_section_xml(){
     486sub buffer_mets_StructMapWhole_section_xml(){
    422487    my $self = shift(@_);
    423488    my ($section) = @_;
     
    430495    my $order_num = 0;
    431496
    432     $all_text .= '<mets:structMap ID="All" TYPE="Whole Document" LABEL="All">'."\n";
    433497    $all_text .= '  <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
    434498 
    435 
    436     #** output the StruMapSection for the whole section
    437     #*** get the sourcefile and associative file
     499    #** output the StructMapSection for the whole section
     500    #  get the sourcefile and associative file
    438501
    439502    foreach my $data (@{$section_ptr->{'metadata'}}){
     
    460523sub buffer_mets_dmdSection_section_xml(){
    461524    my $self = shift(@_);
    462     my ($section) = @_;
     525    my ($section,$version) = @_;
    463526   
    464527    $section="" unless defined $section;
     
    467530    return "" unless defined $section_ptr;
    468531
    469     #***convert section number
     532    # convert section number
    470533    my $section_num ="1". $section;
    471534    my $dmd_num = $section_num;
     
    475538    #   $dmd_num = "0";
    476539    # }
    477     my $all_text = '<mets:dmdSec ID="DM'.$dmd_num.'" GROUPID="'.$section_num.'">'. "\n";
    478     $all_text .= '  <mets:mdWrap MDTYPE="OTHER" OTHERMDTYPE="gsdl3" ID="gsdl'.$section_num.'">'."\n";
     540
     541
     542    my $all_text = "";
     543
     544    my $label_attr = "";
     545    if ($version eq "fedora") {
     546    $all_text .= "<mets:amdSec ID=\"DC\" >\n";
     547    $all_text .= "  <mets:techMD ID=\"DC.0\">\n"; # .0 fedora version number?
     548
     549    $label_attr = "LABEL=\"Dublin Core Metadata\"";
     550    }
     551    else {
     552    print STDERR "***** Check that GROUPID in dmdSec is valid!!!\n";
     553    print STDERR "***** Check to see if <techMD> required\n";
     554    # if it isn't allowed, go back and set $mdTag = dmdSec/amdSec
     555
     556    $all_text .= "<mets:dmdSec ID=\"DM$dmd_num\" GROUPID=\"$section_num\">\n";
     557    }
     558
     559    $all_text .= "  <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"gsdl$section_num\">\n";
    479560    $all_text .= "    <mets:xmlData>\n";
    480     foreach my $data (@{$section_ptr->{'metadata'}}){
    481        my $escaped_value = &_escape_text($data->[1]);
    482        $all_text .= '      <gsdl3:Metadata name="'. $data->[0].'">'. $escaped_value. "</gsdl3:Metadata>\n";
    483        if ($data->[0] eq "dc.Title") {
    484        $all_text .= '      <gsdl3:Metadata name="Title">'. $escaped_value."</gsdl3:Metadata>\n";
    485        }
     561
     562    if ($version eq "fedora") {
     563    my $dc_namespace = "";
     564    $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
     565    $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\">\n";
     566
     567    $all_text .= "  <oai_dc:dc $dc_namespace>\n";
     568
     569    $all_text .= $self->buffer_dc_section($section,"oai_dc");
     570    $all_text .= "  </oai_dc:dc>\n";
     571    }
     572    else {
     573    foreach my $data (@{$section_ptr->{'metadata'}}){
     574        my $escaped_value = &_escape_text($data->[1]);
     575        $all_text .= '      <gsdl3:Metadata name="'. $data->[0].'">'. $escaped_value. "</gsdl3:Metadata>\n";
     576        if ($data->[0] eq "dc.Title") {
     577        $all_text .= '      <gsdl3:Metadata name="Title">'. $escaped_value."</gsdl3:Metadata>\n";
     578        }
     579    }
    486580    }
    487581   
    488582    $all_text .= "    </mets:xmlData>\n";
    489583    $all_text .= "  </mets:mdWrap>\n";
    490     $all_text .= "</mets:dmdSec>\n";
     584   
     585    if ($version eq "fedora") {
     586    $all_text .= "  </mets:techMD>\n";
     587    $all_text .= "</mets:amdSec>\n";
     588    }
     589    else {
     590    $all_text .= "</mets:dmdSec>\n";   
     591    }
     592
    491593
    492594    foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
    493        $all_text .= $self->buffer_mets_dmdSection_section_xml("$section.$subsection");
     595       $all_text .= $self->buffer_mets_dmdSection_section_xml("$section.$subsection",$version);
    494596    }
    495597   
     
    506608}
    507609
    508 #*** print out DSpace dublin_core metadata section
     610#  print out DSpace dublin_core metadata section
    509611sub output_dspace_section {
    510612    my $self = shift (@_);
     
    534636}
    535637
    536 #*** print out doctxt.xml file
     638#  print out doctxt.xml file
    537639sub output_txt_section {
    538640    my $self = shift (@_);
     
    542644}
    543645
    544 #*** print out docmets.xml file
     646#  print out docmets.xml file
    545647sub output_mets_section {
    546648    my $self = shift(@_);
    547     my ($handle, $section) = @_;
    548 
    549     #***print out the dmdSection
    550     print $handle $self->buffer_mets_dmdSection_section_xml($section);
    551 
    552     #***print out the fileSection by sections
     649    my ($handle, $section, $version, $working_dir) = @_;
     650
     651    # print out the dmdSection
     652    print $handle $self->buffer_mets_dmdSection_section_xml($section, $version);
     653
    553654    print $handle "<mets:fileSec>\n";
    554     print $handle $self->buffer_mets_fileSection_section_xml($section);
    555 
    556     #***print out the whole fileSection
    557     print $handle $self->buffer_mets_fileWhole_section_xml($section);
     655    if ($version eq "fedora") {
     656    print $handle "  <mets:fileGrp ID=\"DATASTREAMS\">\n";
     657    }
     658
     659    # print out the fileSection by sections
     660    print $handle $self->buffer_mets_fileSection_section_xml($section,$version);
     661
     662    # print out the whole fileSection
     663    print $handle $self->buffer_mets_fileWhole_section_xml($section,$version,$working_dir);
     664
     665    if ($version eq "fedora") {
     666    print $handle "  </mets:fileGrp>\n";
     667    }
    558668    print $handle "</mets:fileSec>\n";
    559    
    560     #***print out the StruMapSection by sections
    561     print $handle '<mets:structMap ID="Section" TYPE="Section" LABEL="Section">' . "\n";
    562     my $order_num=0;
    563     print $handle $self->buffer_mets_StruMapSection_section_xml($section, \$order_num);
    564     print $handle "</mets:structMap>\n";
    565     print $handle $self->buffer_mets_StruMapWhole_section_xml($section);
    566     print $handle "</mets:structMap>\n";
     669 
     670    # print out the StructMapSection by sections
     671
     672    my $struct_type;
     673    if ($version eq "fedora") {
     674    $struct_type = "fedora:dsBindingMap";
     675    }
     676    else {
     677    $struct_type = "Section";
     678    }
     679
     680    if ($version ne "fedora") {
     681    print $handle "<mets:structMap ID=\"Section\" TYPE=\"$struct_type\" LABEL=\"Section\">\n";
     682    my $order_num=0;
     683    print $handle $self->buffer_mets_StructMapSection_section_xml($section, \$order_num);
     684    print $handle "</mets:structMap>\n";
     685
     686    print $handle '<mets:structMap ID="All" TYPE="Whole Document" LABEL="All">'."\n";
     687    print $handle $self->buffer_mets_StructMapWhole_section_xml($section);
     688    print $handle "</mets:structMap>\n";
     689    }
     690 
    567691}
    568692
     
    585709
    586710
    587 #*** print out dublin_core.xml file
    588 sub output_dc_section {
     711
     712# Build up dublin_core metadata.  Priority given to dc.* over ex.*
     713
     714sub buffer_dc_section {
    589715    my $self = shift(@_);
    590     my ($handle, $section, $doc_Dir) = @_;
    591    
    592     #***print out the dublin_core
     716    my ($section, $version) = @_;
     717   
     718    # build up string of dublin core metadata
    593719    $section="" unless defined $section;
    594720   
     
    614740
    615741        #$all_text .= '   <dcvalue element="'. $data->[0].'" qualifier="#####">'. $escaped_value. "</dcvalue>\n";
    616         $all_text .= '   <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
     742        if (defined $version && ($version eq "oai_dc")) {
     743        $all_text .= "   <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
     744        }
     745        else {
     746        $all_text .= '   <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
     747        }
     748
    617749    }
    618750    elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) {
     
    642774            my $escaped_value = $v;
    643775
    644             $all_text .= '   <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
     776            if (defined $version && ($version eq "oai_dc")) {
     777            $all_text .= "   <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
     778            }
     779            else {
     780            $all_text .= '   <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
     781            }
    645782           
    646783        }
     
    654791    $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
    655792
     793    return $all_text;
     794}
     795
     796
     797# Print out dublin_core metadata
     798sub output_dc_section {
     799    my $self = shift(@_);
     800    my ($handle, $section, $version) = @_;
     801   
     802    my $all_text = $self->buffer_dc_section($section,$version);
     803
    656804    print $handle $all_text;
    657805}
     806
    658807
    659808# look up the reference to the a particular section
Note: See TracChangeset for help on using the changeset viewer.