Changeset 8517


Ignore:
Timestamp:
2004-11-11T14:31:46+13:00 (19 years ago)
Author:
chi
Message:

Add and modify methods to deal with exporting GS collections to "METS" and "DSpace" format.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/docsave.pm

    r8094 r8517  
    3535
    3636use arcinfo;
     37use expinfo;
    3738use docproc;
    3839use util;
     
    4445
    4546sub new {
    46     my ($class, $collection, $archive_info, $verbosity,
    47     $gzip, $groupsize, $outhandle) = @_;
     47    my ($class, $collection, $info, $verbosity,
     48    $gzip, $groupsize, $outhandle, $service, $saveas) = @_;
    4849    my $self = new docproc ();
    49    
    5050   
    5151    $groupsize=1 unless defined $groupsize;
    5252    $self->{'collection'} = $collection;
    53     $self->{'archive_info'} = $archive_info;
     53    if ($service eq "import"){
     54    $self->{'archive_info'} = $info;
     55    } elsif ($service eq "export"){
     56    $self->{'export_info'} = $info;
     57    } else {
     58    return;
     59    }
     60
    5461    $self->{'verbosity'} = $verbosity;
    5562    $self->{'gzip'} = $gzip;
     
    6067    $self->{'outhandle'} = 'STDERR';
    6168    $self->{'outhandle'} = $outhandle if defined $outhandle;
     69    $self->{'service'} = $service;
     70    $self->{'saveas'} = $saveas;
     71
    6272    # set a default for the archive directory
    63     $self->{'archive_dir'} = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives");
    64    
     73    if ($service eq "import"){
     74    $self->{'archive_dir'} = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives");
     75    } elsif ($service eq "export") {
     76    # set a default for the export directory
     77    $self->{'export_dir'} = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "export");
     78    } else {
     79    return;
     80    }
    6581    $self->{'sortmeta'} = undef;
    6682   
     
    7692}
    7793
     94sub setexportdir {
     95    my $self = shift (@_);
     96    my ($export_dir) = @_;
     97 
     98    &util::mk_all_dir ($export_dir) unless -e $export_dir;
     99    $self->{'export_dir'} = $export_dir;
     100}
     101
    78102sub set_sortmeta {
    79103    my $self = shift (@_);
     
    81105   
    82106    $self->{'sortmeta'} = $sortmeta;
    83 }
     107 }
    84108
    85109sub process {
     
    88112 
    89113    my $outhandle = $self->{'outhandle'};
    90    
     114    my $service = $self->{'service'} || "import";
     115
     116    # Define the SaveAs Type
     117    my $save_as = $self->{'saveas'} || "GA";
     118    my $collection = $self->{'collection'};
     119
    91120    if ($self->{'groupsize'} > 1) {
    92121    $self->group_process ($doc_obj);
     
    94123    }
    95124
     125    my $OID = $doc_obj->get_OID();
     126    $OID = "NULL" unless defined $OID;
     127
     128    # get document's directory
     129    my $doc_dir = $self->get_doc_dir ($OID);
     130
    96131    # groupsize is 1 (i.e. one document per XML file) so sortmeta
    97132    # may be used
    98    
    99     my $OID = $doc_obj->get_OID();
    100     $OID = "NULL" unless defined $OID;
    101 
    102     # get document's directory
    103     my $doc_dir = $self->get_doc_dir ($OID);
    104      
     133
     134    if ($service eq "import") {
     135    my $archive_info = $self->{'archive_info'};
     136    } elsif ($service eq "export") {
     137    my $export_info = $self->{'export_info'};
     138    } else {
     139    return;
     140    }
    105141   
    106142    # copy all the associated files, add this information as metadata
    107143    # to the document
    108     $self->process_assoc_files ($doc_obj, $doc_dir);
     144    if ($service eq "export" && $save_as eq "DSpace") {
     145    # open contents file
     146    my $doc_contents_file
     147        = &util::filename_cat ($self->{'export_dir'},$doc_dir, "contents");
    109148   
    110     my $doc_file
    111     = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.xml");
    112      
    113     #***define doctxt.xml file
    114     my $doc_txt_file
    115     = &util::filename_cat ($self->{'archive_dir'}, $doc_dir,"doctxt.xml");
    116     my $working_dir
    117     =&util::filename_cat ($self->{'archive_dir'}, $doc_dir);
     149    if (!open(OUTDOC_EXPORT_CONTENTS,">$doc_contents_file")){
     150        print $outhandle "docsave::process could not write collection contents to file $doc_contents_file\n";
     151        return;
     152    }
     153    $self->process_assoc_files ($doc_obj, $doc_dir, 'docsave::OUTDOC_EXPORT_CONTENTS');
     154    } else {
     155    $self->process_assoc_files ($doc_obj, $doc_dir, '');
     156    }
    118157       
    119     #***define docmets.xmlfile
    120     my $doc_mets_file
    121     = &util::filename_cat ($self->{'archive_dir'},$doc_dir, "docmets.xml");
     158    my $doc_file;
     159    my $doc_mets_file;
     160    my $doc_txt_file;
     161    my $short_doc_file;
     162
     163    #Import collection to GS2 in GS Archive format and METs format
     164    if ($service eq "import") {
     165    my $doc_file
     166        = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.xml");
     167
     168    #***define doctxt.xml file
     169    my $doc_txt_file
     170        = &util::filename_cat ($self->{'archive_dir'}, $doc_dir,"doctxt.xml");
     171
     172    my $import_working_dir
     173        =&util::filename_cat ($self->{'archive_dir'}, $doc_dir);
     174   
     175    #***define docmets.xml file
     176    my $doc_mets_file
     177        = &util::filename_cat ($self->{'archive_dir'},$doc_dir, "docmets.xml");
     178   
     179    if ($save_as eq "GA") {
     180        $short_doc_file = util::filename_cat ($doc_dir, "doc.xml");
     181    } elsif ($save_as eq "METS") {
     182        #my $short_txt_doc_file=&util::filename_cat ($doc_dir, "doctxt.xml");
     183        $short_doc_file = &util::filename_cat ($doc_dir, "docmets.xml");
     184    } else {
     185        return;
     186    }
     187   
     188    if ($save_as eq "GA") {
     189        if (!open (OUTDOC, ">$doc_file")) {
     190        print $outhandle "docsave::process could not write to file $doc_file\n";
     191        return;
     192        }
     193        # save this document
     194        $self->output_xml_header('docsave::OUTDOC');
     195        $doc_obj->output_section('docsave::OUTDOC',
     196                 $doc_obj->get_top_section());
     197        $self->output_xml_footer('docsave::OUTDOC');
     198       
     199        close OUTDOC;
     200    } elsif ($save_as eq "METS") {
     201        # save the document without metadata:doctxt.xml
     202       
     203        if (!open(OUTDOC_TXT, ">$doc_txt_file")){
     204        print $outhandle "docsave::process could not write to file $doc_txt_file\n";
     205        return;
     206        }
     207       
     208        $self->output_txt_xml_header('docsave::OUTDOC_TXT');
     209        $doc_obj->output_txt_section('docsave::OUTDOC_TXT', $doc_obj->get_top_section());
     210        #$self->output_txt_xml_footer('docsave::OUTDOC_TXT');
     211       
     212        # Convert doctxt.xml file to docmets.xml
     213        if (!open(OUTDOC_METS,">$doc_mets_file")){
     214        print $outhandle "docsave::process could not write to file $doc_mets_file\n";
     215        return;
     216        }
     217       
     218        $self->output_mets_xml_header('docsave::OUTDOC_METS', $OID);
     219        $doc_obj->output_mets_section('docsave::OUTDOC_METS',
     220                      $doc_obj->get_top_section(),
     221                      $import_working_dir);
     222        $self->output_mets_xml_footer('docsave::OUTDOC_METS');
     223       
     224        close OUTDOC_TXT;
     225        close OUTDOC_METS;
     226    } else { # save_as isn't GA or METS
     227        print $outhandle "docsave::process unrecognised saveas type, $save_as\n";
     228        return;
     229    }
     230    }
     231   
     232    ## Export the collection to METs format or DSpace Archive Format into the export directory
     233    if ($service eq "export") {
     234    my $doc_dc_file;
     235    my $doc_contents_file;
     236
     237    my $export_working_dir
     238        =&util::filename_cat ($self->{'export_dir'}, $doc_dir);
     239       
     240    if ($save_as eq "METS") {
     241        $doc_mets_file
     242        = &util::filename_cat ($self->{'export_dir'},$doc_dir, "docmets.xml");
     243       
     244        $doc_txt_file
     245        = &util::filename_cat ($self->{'export_dir'},$doc_dir, "doctxt.xml");
     246       
     247        if (!open(OUTDOC_EXPORT_TXT, ">$doc_txt_file")){
     248        print $outhandle "docsave::process could not write TXT to file $doc_txt_file\n";
     249        return;
     250        }
     251       
     252        $self->output_txt_xml_header('docsave::OUTDOC_EXPORT_TXT');
     253        $doc_obj->output_txt_section('docsave::OUTDOC_EXPORT_TXT', $doc_obj->get_top_section());
     254       
     255        if (!open(OUTDOC_EXPORT_METS,">$doc_mets_file")){
     256        print $outhandle "docsave::process could not write METS format to file $doc_mets_file\n";
     257        return;
     258        }
     259        $self->output_mets_xml_header('docsave::OUTDOC_EXPORT_METS', $OID);
     260        $doc_obj->output_mets_section('docsave::OUTDOC_EXPORT_METS',$doc_obj->get_top_section(), $export_working_dir);
     261        $self->output_mets_xml_footer('docsave::OUTDOC_EXPORT_METS');
     262   
     263        close OUTDOC_EXPORT_TXT;
     264        close OUTDOC_EXPORT_METS;
     265    } elsif ($save_as eq "DSpace") {
     266
     267        # Generate dublin_core.xml file
     268        $doc_dc_file
     269        = &util::filename_cat ($self->{'export_dir'},$doc_dir, "dublin_core.xml");
     270   
     271        if (!open(OUTDOC_EXPORT_DC,">$doc_dc_file")){
     272        print $outhandle "docsave::process could not write dublin core to file $doc_dc_file\n";
     273        return;
     274        }   
     275       
     276        $self->output_dc_xml_header('docsave::OUTDOC_EXPORT_DC', $OID);
     277        $doc_obj->output_dc_section('docsave::OUTDOC_EXPORT_DC',$doc_obj->get_top_section(), $export_working_dir);
     278        $self->output_dc_xml_footer('docsave::OUTDOC_EXPORT_DC');
     279   
     280        close OUTDOC_EXPORT_DC;
     281        close OUTDOC_EXPORT_CONTENTS;
     282    } else { # save_as isn't METS or DSpace
     283        print $outhandle "docsave::process unrecognised saveas type, $save_as\n";
     284        return;
     285    }
     286
     287    if ($save_as eq "METS") {
     288        $short_doc_file = util::filename_cat ($doc_dir, "docmets.xml");
     289    } elsif ($save_as eq "DSpace") {
     290        #my $short_txt_doc_file=&util::filename_cat ($doc_dir, "doctxt.xml");
     291        $short_doc_file=&util::filename_cat ($doc_dir, "dublin_core.xml");
     292    } else {
     293        return;
     294    }
     295   
     296    }
     297    #save for later (for close_file_output())
     298    $self->{'short_doc_file'} = $short_doc_file;   
    122299 
    123     my $short_doc_file;
    124     my $save_as = $self->{'saveas'} || "GA";
    125     if ($save_as eq "GA") {
    126     $short_doc_file = util::filename_cat ($doc_dir, "doc.xml");
    127     } elsif ($save_as eq "METS") {
    128     #my $short_txt_doc_file=&util::filename_cat ($doc_dir, "doctxt.xml");
    129     $short_doc_file=&util::filename_cat ($doc_dir, "docmets.xml");
    130     } else {
    131     return;
    132     }
    133     # save for later (for close_file_output())
    134     $self->{'short_doc_file'}=$short_doc_file;
    135 
    136     if ($save_as eq "GA") {
    137     if (!open (OUTDOC, ">$doc_file")) {
    138         print $outhandle "docsave::process could not write to file $doc_file\n";
    139         return;
    140     }
    141        
    142     # save this document
    143     $self->output_xml_header('docsave::OUTDOC');
    144     $doc_obj->output_section('docsave::OUTDOC',
    145                  $doc_obj->get_top_section());
    146     $self->output_xml_footer('docsave::OUTDOC');
    147 
    148     close OUTDOC;
    149     } elsif ($save_as eq "METS") {
    150     # save the document without metadata:doctxt.xml
    151 
    152     if (!open(OUTDOC_TXT, ">$doc_txt_file")){
    153         print $outhandle "docsave::process could not write to file $doc_mets_file\n";
    154         return;
    155     }
    156 
    157     $self->output_txt_xml_header('docsave::OUTDOC_TXT');
    158     $doc_obj->output_txt_section('docsave::OUTDOC_TXT', $doc_obj->get_top_section());
    159     #$self->output_txt_xml_footer('docsave::OUTDOC_TXT');
    160    
    161     # Convert doctxt.xml file to docmets.xml
    162     if (!open(OUTDOC_METS,">$doc_mets_file")){
    163         print $outhandle "docsave::process could not write to file $doc_mets_file\n";
    164         return;
    165     }
    166        
    167     $self->output_mets_xml_header('docsave::OUTDOC_METS', $OID);
    168     $doc_obj->output_mets_section('docsave::OUTDOC_METS',
    169                       $doc_obj->get_top_section(),
    170                       $working_dir);
    171     $self->output_mets_xml_footer('docsave::OUTDOC_METS');
    172 
    173     close OUTDOC_TXT;
    174     close OUTDOC_METS;
    175     } else { # save_as isn't GA or METS
    176     print $outhandle "docsave::process unrecognised saveas type, $save_as\n";
    177     return;
    178     }
    179 
    180300    if ($self->{'gzip'}) {
    181301    my $doc_file = $self->{'gs_filename'};
     
    190310
    191311    # do the sortmeta thing
    192     my ($metadata); if (defined ($self->{'sortmeta'})) {
     312    my ($metadata);
     313    if (defined ($self->{'sortmeta'})) {
    193314    $metadata = $doc_obj->get_metadata_element($doc_obj->get_top_section(),
    194315                           $self->{'sortmeta'});
    195316    }
    196317
    197     # store reference in the archive_info
    198     $self->{'archive_info'}->add_info($OID, $short_doc_file, $metadata);
     318    # store reference in the archive_info and export_info
     319    if ($service eq "export") {
     320    $self->{'export_info'}->add_info($OID, $short_doc_file, $metadata);
     321    } elsif ($service eq "import") {
     322    $self->{'archive_info'}->add_info($OID, $short_doc_file, $metadata);
     323    }
    199324}
    200325
     
    253378}
    254379
    255 
    256380sub get_doc_dir {
    257381    my $self = shift (@_);
    258382    my ($OID) = @_;
    259 
    260     my $doc_info = $self->{'archive_info'}->get_info($OID);
    261     my $doc_dir = "";
     383    my $doc_info;
     384    my $doc_dir;
     385    my $service = $self-> {'service'};
     386    my $working_dir;
     387    my $working_info;
     388
     389    if ($service eq "import") {
     390    $doc_info = $self->{'archive_info'}->get_info($OID);
     391    $working_dir = $self->{'archive_dir'};
     392    $working_info = $self->{'archive_info'};
     393    } elsif ($service eq "export") {
     394    $doc_info =$self->{'export_info'}->get_info($OID);
     395        $working_dir = $self->{'export_dir'};
     396    $working_info = $self->{'export_info'};
     397    } else {
     398    return;
     399    }
    262400    if (defined $doc_info && scalar(@$doc_info) >= 1) {
    263401    # this OID already has an assigned directory, use the
     
    276414        }
    277415    } while ($doc_dir_rest ne "" &&
    278          ((-d &util::filename_cat ($self->{'archive_dir'}, "$doc_dir.dir")) ||
    279           ($self->{'archive_info'}->size() >= 1024 && $doc_dir_num < 2)));
     416         ((-d &util::filename_cat ($working_dir, "$doc_dir.dir")) ||
     417          ($working_info->size() >= 1024 && $doc_dir_num < 2)));
     418
    280419    $doc_dir .= ".dir";
    281    
    282     }
    283    
    284     &util::mk_all_dir (&util::filename_cat ($self->{'archive_dir'}, $doc_dir));
    285 
     420    &util::mk_all_dir (&util::filename_cat ($working_dir, $doc_dir));
     421    }
    286422    return $doc_dir;
    287423}
    288424
    289 
    290425sub process_assoc_files {
    291426    my $self = shift (@_);
    292     my ($doc_obj, $doc_dir) = @_;
     427    my ($doc_obj, $doc_dir, $handle) = @_;
    293428
    294429    my $outhandle = $self->{'outhandle'};
    295 
     430   
    296431    my @assoc_files = ();
     432    my $filename;;
     433    my $working_dir;
     434    my $service = $self->{'service'};
     435    my $save_as = $self->{'saveas'};
     436
     437    if ($service eq "import") {
     438    $working_dir = $self->{'archive_dir'};
     439    } elsif ($service eq "export"){
     440    $working_dir = $self->{'export_dir'};
     441    } else {
     442    return;
     443    }
     444    $doc_obj->get_source_filename()=~ /\/[^\/\\]$/;
     445
     446    if ($save_as eq "DSpace") {
     447    print $handle "$1\n";
     448    $filename = &util::filename_cat($working_dir, $doc_dir, $1);
     449    &util::hard_link ($doc_obj->get_source_filename(), $filename);
     450    }
     451           
    297452    foreach my $assoc_file (@{$doc_obj->get_assoc_files()}) {
    298453    my ($dir, $afile) = $assoc_file->[1] =~ /^(.*?)([^\/\\]+)$/;
    299454    $dir = "" unless defined $dir;
     455
     456    # Store the associated file to the "contents" file
     457    if ($save_as eq "DSpace") {
     458        print $handle "$assoc_file->[1]\n";
     459    }
     460
    300461    if (-e $assoc_file->[0]) {
    301         my $filepath = &util::filename_cat($self->{'archive_dir'}, $doc_dir, $afile);
    302         &util::hard_link ($assoc_file->[0], $filepath);
     462        $filename = &util::filename_cat($working_dir, $doc_dir, $afile);
     463
     464        &util::hard_link ($assoc_file->[0], $filename);
     465       
    303466        $doc_obj->add_utf8_metadata ($doc_obj->get_top_section(),
    304467                     "gsdlassocfile",
     
    318481{
    319482    my ($self) = @_;
     483    my $service =$self->{'service'};
    320484
    321485    # make sure that the handle has been opened - it won't be if we failed
     
    331495    if (exists($self->{'saveas'}) && $self->{'saveas'} eq "METS") {
    332496    $short_doc_file=$self->{'short_doc_file'};
    333     } else { # "GA"
     497    } elsif ($self->{'saveas'} eq "GA") { # "GA"
    334498    $short_doc_file=$self->{'gs_short_filename'};
    335     }
    336 
     499    } else { # "DSpace"
     500    }
     501   
    337502    if ($self->{'gzip'}) {
    338503    my $doc_file = $self->{'gs_filename'};
     
    347512    }
    348513
    349     # store reference in the archive_info
    350     $self->{'archive_info'}->add_info($OID, $short_doc_file);
    351    
     514    # store reference in the archive_info and export_infor
     515    if ($service eq "import") {
     516    $self->{'archive_info'}->add_info($OID, $short_doc_file);
     517    } elsif ($service eq "export") {
     518    $self->{'export_info'}->add_info($OID, $short_doc_file);
     519    } else {
     520    return;
     521    }
    352522    return 1;
    353523}
     
    386556    my $self = shift(@_);
    387557    my ($handle, $OID) = @_;
     558
    388559    print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n";
    389560    print $handle '<!DOCTYPE Archive SYSTEM "http://greenstone.org/dtd/Archive/1.0/Archive.dtd">' . "\n";
     
    397568}
    398569
     570sub output_dc_xml_header(){
     571    my $self = shift(@_);
     572    my ($handle, $OID) = @_;
     573
     574    print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n";
     575#    print $handle '<!DOCTYPE Archive SYSTEM "http://greenstone.org/dtd/Archive/1.0/Archive.dtd">' . "\n";
     576    print $handle '<dublin_core>' . "\n";
     577}
     578
     579sub output_dc_xml_footer() {
     580    my $self = shift(@_);
     581    my ($handle) = @_;
     582    print $handle '</dublin_core>' . "\n";
     583}
    3995841;
    400 
    401 
    402 
Note: See TracChangeset for help on using the changeset viewer.