Ignore:
Timestamp:
2005-05-25T17:11:20+12:00 (19 years ago)
Author:
davidb
Message:

The abilities to save documents from import.pl and export.pl are very similar,
but the code did not always reflect this. At times it was repeatative and
at others inconsistent. This has now been refactored and brought in to
line.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/docsave.pm

    r9921 r9954  
    4949    my $self = new docproc ();
    5050   
    51     $groupsize=1 unless defined $groupsize;
    52     $service="import" unless defined $service;
     51    my $collectdir = $ENV{'GSDLCOLLECTDIR'};
     52
     53    $outhandle = 'STDERR' unless (defined $outhandle);
     54    $service   = "import" unless (defined $service);
     55    $saveas    = "GA" unless (defined $saveas);
     56    $groupsize = 1 unless (defined $groupsize);
    5357
    5458    $self->{'collection'} = $collection;
    55     if ($service eq "import"){
     59    if (($service eq "import") || ($service eq "unbuild")) {
    5660    $self->{'archive_info'} = $info;
     61    # set a default for the archive directory
     62    $self->{'archive_dir'} = &util::filename_cat ($collectdir, "archives");
    5763    } elsif ($service eq "export"){
    5864    $self->{'export_info'} = $info;
     65    # set a default for the export directory
     66    $self->{'export_dir'} = &util::filename_cat($collectdir, "export");
    5967    } else {
     68    print $outhandle "docsave::new Unrecongised service: $service\n";
    6069    return;
    6170    }
     
    6574    $self->{'keepimportstructure'} = 0;
    6675    $self->{'groupsize'} = $groupsize;
    67     $self->{'gs_count'} = 0;
    68 
    69     $self->{'outhandle'} = 'STDERR';
    70     $self->{'outhandle'} = $outhandle if defined $outhandle;
    71     $self->{'service'} = $service;
    72     $self->{'saveas'} = $saveas;
    73 
    74     # set a default for the archive directory
    75     if ($service eq "import"){
    76     $self->{'archive_dir'} = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives");
    77     } elsif ($service eq "export") {
    78     # set a default for the export directory
    79     $self->{'export_dir'} = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "export");
    80     } else {
    81     return;
    82     }
     76    $self->{'gs_count'}  = 0;
     77
     78    $self->{'outhandle'} = $outhandle;
     79    $self->{'service'}   = $service;
     80    $self->{'saveas'}    = $saveas;
     81
    8382    $self->{'sortmeta'} = undef;
    8483   
     
    102101}
    103102
     103sub getoutputdir {
     104    my $self = shift (@_);
     105
     106    my $output_dir = undef;
     107
     108    my $service = $self->{'service'};
     109
     110    if (($service eq "import") || ($service eq "unbuild")) {
     111    $output_dir = $self->{'archive_dir'};
     112    }
     113    elsif ($service eq "export") {
     114    $output_dir = $self->{'export_dir'};
     115    }
     116    else {
     117    my $outhandle = $self->{'outhandle'};
     118
     119    print $outhandle "docsave::getoutputdir did not recognise service ";
     120    print $outhandle " '$service'. No output directory set.\n";
     121    }
     122
     123    return $output_dir;
     124}
     125
     126
     127sub getoutputinfo {
     128    my $self = shift (@_);
     129
     130    my $output_info = undef;
     131
     132    my $service = $self->{'service'};
     133
     134    if (($service eq "import") || ($service eq "unbuild")) {
     135    $output_info = $self->{'archive_info'};
     136    }
     137    elsif ($service eq "export") {
     138    $output_info = $self->{'export_info'};
     139    }
     140    else {
     141    my $outhandle = $self->{'outhandle'};
     142
     143    print $outhandle "docsave::getoutputinfo did not recognise service ";
     144    print $outhandle " '$service'. No output information available.\n";
     145    }
     146
     147    return $output_info;
     148}
     149
     150
    104151sub set_sortmeta {
    105152    my $self = shift (@_);
     
    122169 
    123170    my $outhandle = $self->{'outhandle'};
    124     my $service = $self->{'service'} || "import";
     171    my $service = $self->{'service'};
    125172
    126173    # Define the SaveAs Type
    127     my $save_as = $self->{'saveas'} || "GA";
     174    my $save_as = $self->{'saveas'};
    128175    my $collection = $self->{'collection'};
    129176
     
    136183    $OID = "NULL" unless defined $OID;
    137184
     185    my $top_section = $doc_obj->get_top_section();
     186
    138187    # get document's directory
    139188    my $doc_dir = $self->get_doc_dir ($OID, $doc_obj->get_source_filename());
     
    141190    # groupsize is 1 (i.e. one document per XML file) so sortmeta
    142191    # may be used
    143 
    144     if ($service eq "import") {
    145     my $archive_info = $self->{'archive_info'};
    146     } elsif ($service eq "export") {
    147     my $export_info = $self->{'export_info'};
    148     } else {
    149     return;
    150     }
    151    
     192   
     193    my $output_info = $self->getoutputinfo();
     194    return if (!defined $output_info);
     195
     196    my $output_dir = $self->getoutputdir();
     197    my $working_dir = &util::filename_cat ($output_dir, $doc_dir);
     198
    152199    # copy all the associated files, add this information as metadata
    153200    # to the document
    154     if ($service eq "export" && $save_as eq "DSpace") {
    155     # create handle file based on doc_dir
    156 
    157     my $doc_handle_file
    158         = &util::filename_cat ($self->{'export_dir'},$doc_dir, "handle");
    159    
    160     if (!open(OUTDOC_EXPORT_HANDLE,">$doc_handle_file")){
     201    if ($save_as eq "DSpace") {
     202
     203    # Genereate handle file
     204    # (Note: this section of code would benefit from being restructured)
     205    my $doc_handle_file = &util::filename_cat ($working_dir, "handle");
     206
     207    my $env_hp = $ENV{'DSPACE_HANDLE_PREFIX'};
     208    my $handle_prefix = (defined $env_hp) ? $env_hp : "123456789";
     209
     210    if (!open(OUTDOC_HANDLE,">$doc_handle_file")){
    161211        print $outhandle "docsave::process could not write collection handle to file $doc_handle_file\n";
    162212        return;
     
    164214
    165215    my ($handle) = ($doc_dir =~ m/^(.*)\.dir$/);
    166     print OUTDOC_EXPORT_HANDLE "123456789/$handle\n";
    167 
    168     close(OUTDOC_EXPORT_HANDLE);
    169 
    170     # open contents file
    171     my $doc_contents_file
    172         = &util::filename_cat ($self->{'export_dir'},$doc_dir, "contents");
    173    
    174     if (!open(OUTDOC_EXPORT_CONTENTS,">$doc_contents_file")){
     216    print OUTDOC_HANDLE "$handle_prefix/$handle\n";
     217
     218    close OUTDOC_HANDLE;
     219
     220    # Generate contents file
     221    my $doc_contents_file = &util::filename_cat ($working_dir, "contents");
     222   
     223    if (!open(OUTDOC_CONTENTS,">$doc_contents_file")){
    175224        print $outhandle "docsave::process could not write collection contents to file $doc_contents_file\n";
    176225        return;
    177226    }
    178     $self->process_assoc_files ($doc_obj, $doc_dir, 'docsave::OUTDOC_EXPORT_CONTENTS');
     227    $self->process_assoc_files ($doc_obj, $doc_dir, 'docsave::OUTDOC_CONTENTS');
     228
     229    close OUTDOC_CONTENTS;
     230
    179231    } else {
    180232    $self->process_assoc_files ($doc_obj, $doc_dir, '');
    181233    }
    182234       
    183     my $doc_file;
    184     my $doc_mets_file;
    185     my $doc_txt_file;
     235    # Save the document in the requested 'save_as' format
     236
     237    if ($save_as eq "GA") {
     238
     239    my $doc_file = &util::filename_cat ($working_dir, "doc.xml");
     240
     241    if (!open (OUTDOC, ">$doc_file")) {
     242        print $outhandle "docsave::process could not write to file $doc_file\n";
     243        return;
     244    }
     245   
     246    # save this document
     247    $self->output_xml_header('docsave::OUTDOC');
     248    $doc_obj->output_section('docsave::OUTDOC',$top_section);
     249    $self->output_xml_footer('docsave::OUTDOC');
     250   
     251    close OUTDOC;
     252    }
     253    elsif ($save_as eq "METS") {
     254   
     255    my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt.xml");
     256
     257    if (!open(OUTDOC_TXT, ">$doc_txt_file")){
     258        print $outhandle "docsave::process could not write to file $doc_txt_file\n";
     259        return;
     260    }
     261   
     262    $self->output_txt_xml_header('docsave::OUTDOC_TXT');
     263    $doc_obj->output_txt_section('docsave::OUTDOC_TXT', $top_section);
     264    $self->output_txt_xml_footer('docsave::OUTDOC_TXT');
     265   
     266    close OUTDOC_TXT;
     267   
     268    # Now save the document with metadata and text structure to docmets.xml
     269   
     270    my $doc_mets_file = &util::filename_cat ($working_dir, "docmets.xml");
     271
     272    my $doc_title = $doc_obj->get_metadata_element($top_section,"dc.Title");
     273    if (!defined $doc_title) {
     274        $doc_title = $doc_obj->get_metadata_element($top_section,"Title");
     275    }
     276
     277    if (!open(OUTDOC_METS,">$doc_mets_file")){
     278        print $outhandle "docsave::process could not write to file $doc_mets_file\n";
     279        return;
     280    }
     281   
     282    my $saveas_version = $self->{'saveas_version'};
     283    $self->output_mets_xml_header('docsave::OUTDOC_METS', $OID, $doc_title);
     284    $doc_obj->output_mets_section('docsave::OUTDOC_METS',$top_section,$saveas_version,$working_dir);
     285    $self->output_mets_xml_footer('docsave::OUTDOC_METS');
     286   
     287    close OUTDOC_METS; 
     288    }
     289    elsif ($save_as eq "DSpace") {
     290
     291    # Generate dublin_core.xml file
     292    my $doc_dc_file = &util::filename_cat ($working_dir, "dublin_core.xml");
     293   
     294    if (!open(OUTDOC_DC,">$doc_dc_file")){
     295        print $outhandle "docsave::process could not write dublin core to file $doc_dc_file\n";
     296        return;
     297    }   
     298   
     299    my $saveas_version = $self->{'saveas_version'};
     300
     301    $self->output_dc_xml_header('docsave::OUTDOC_DC', $OID);
     302    $doc_obj->output_dc_section('docsave::OUTDOC_DC',$top_section);
     303    $self->output_dc_xml_footer('docsave::OUTDOC_DC');
     304   
     305    close OUTDOC_DC;
     306    } else { # save_as isn't one of the recognised types
     307    print $outhandle "docsave::process unrecognised saveas type, $save_as\n";
     308    return;
     309    }
     310
     311
    186312    my $short_doc_file;
    187313
    188     # Save collection as either Greenstone Archive or  METS format
    189     if ($service eq "import") {
    190     my $doc_file
    191         = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.xml");
    192 
    193     # define doctxt.xml file
    194     my $doc_txt_file
    195         = &util::filename_cat ($self->{'archive_dir'}, $doc_dir,"doctxt.xml");
    196 
    197     my $import_working_dir
    198         =&util::filename_cat ($self->{'archive_dir'}, $doc_dir);
    199    
    200     # define docmets.xml file
    201     my $doc_mets_file
    202         = &util::filename_cat ($self->{'archive_dir'},$doc_dir, "docmets.xml");
    203 
    204     if ($save_as eq "GA") {
    205         $short_doc_file = util::filename_cat ($doc_dir, "doc.xml");
    206     } elsif ($save_as eq "METS") {
    207         #my $short_txt_doc_file=&util::filename_cat ($doc_dir, "doctxt.xml");
    208         $short_doc_file = &util::filename_cat ($doc_dir, "docmets.xml");
    209     } else {
    210         return;
    211     }
    212    
    213     if ($save_as eq "GA") {
    214         if (!open (OUTDOC, ">$doc_file")) {
    215         print $outhandle "docsave::process could not write to file $doc_file\n";
    216         return;
    217         }
    218         # save this document
    219         $self->output_xml_header('docsave::OUTDOC');
    220         $doc_obj->output_section('docsave::OUTDOC',
    221                  $doc_obj->get_top_section());
    222         $self->output_xml_footer('docsave::OUTDOC');
    223        
    224         close OUTDOC;
    225     } elsif ($save_as eq "METS") {
    226         # save the document without metadata:doctxt.xml
    227        
    228         if (!open(OUTDOC_TXT, ">$doc_txt_file")){
    229         print $outhandle "docsave::process could not write to file $doc_txt_file\n";
    230         return;
    231         }
    232        
    233         $self->output_txt_xml_header('docsave::OUTDOC_TXT');
    234         $doc_obj->output_txt_section('docsave::OUTDOC_TXT', $doc_obj->get_top_section());
    235         #$self->output_txt_xml_footer('docsave::OUTDOC_TXT');
    236        
    237         # Convert doctxt.xml file to docmets.xml
    238         if (!open(OUTDOC_METS,">$doc_mets_file")){
    239         print $outhandle "docsave::process could not write to file $doc_mets_file\n";
    240         return;
    241         }
    242        
    243         $self->output_mets_xml_header('docsave::OUTDOC_METS', $OID);
    244         $doc_obj->output_mets_section('docsave::OUTDOC_METS',
    245                       $doc_obj->get_top_section());
    246         $self->output_mets_xml_footer('docsave::OUTDOC_METS');
    247        
    248         close OUTDOC_TXT;
    249         close OUTDOC_METS;
    250     } else { # save_as isn't GA or METS
    251         print $outhandle "docsave::process unrecognised saveas type, $save_as\n";
    252         return;
    253     }
    254     }
    255    
    256     ## Export the collection to METs format or DSpace Archive Format into the export directory
    257     if ($service eq "export") {
    258     my $doc_dc_file;
    259     my $doc_contents_file;
    260 
    261     my $export_working_dir
    262         =&util::filename_cat ($self->{'export_dir'}, $doc_dir);
    263        
    264     if ($save_as eq "METS") {
    265         $doc_mets_file
    266         = &util::filename_cat ($self->{'export_dir'},$doc_dir, "docmets.xml");
    267        
    268         $doc_txt_file
    269         = &util::filename_cat ($self->{'export_dir'},$doc_dir, "doctxt.xml");
    270        
    271         if (!open(OUTDOC_EXPORT_TXT, ">$doc_txt_file")){
    272         print $outhandle "docsave::process could not write TXT to file $doc_txt_file\n";
    273         return;
    274         }
    275        
    276         $self->output_txt_xml_header('docsave::OUTDOC_EXPORT_TXT');
    277         $doc_obj->output_txt_section('docsave::OUTDOC_EXPORT_TXT', $doc_obj->get_top_section());
    278        
    279         if (!open(OUTDOC_EXPORT_METS,">$doc_mets_file")){
    280         print $outhandle "docsave::process could not write METS format to file $doc_mets_file\n";
    281         return;
    282         }
    283         $self->output_mets_xml_header('docsave::OUTDOC_EXPORT_METS', $OID);
    284         $doc_obj->output_mets_section('docsave::OUTDOC_EXPORT_METS',$doc_obj->get_top_section(), $export_working_dir);
    285         $self->output_mets_xml_footer('docsave::OUTDOC_EXPORT_METS');
    286    
    287         close OUTDOC_EXPORT_TXT;
    288         close OUTDOC_EXPORT_METS;
    289     } elsif ($save_as eq "DSpace") {
    290 
    291         # Generate dublin_core.xml file
    292         $doc_dc_file
    293         = &util::filename_cat ($self->{'export_dir'},$doc_dir, "dublin_core.xml");
    294    
    295         if (!open(OUTDOC_EXPORT_DC,">$doc_dc_file")){
    296         print $outhandle "docsave::process could not write dublin core to file $doc_dc_file\n";
    297         return;
    298         }   
    299        
    300         $self->output_dc_xml_header('docsave::OUTDOC_EXPORT_DC', $OID);
    301         $doc_obj->output_dc_section('docsave::OUTDOC_EXPORT_DC',$doc_obj->get_top_section(), $export_working_dir);
    302         $self->output_dc_xml_footer('docsave::OUTDOC_EXPORT_DC');
    303    
    304         close OUTDOC_EXPORT_DC;
    305         close OUTDOC_EXPORT_CONTENTS;
    306     } else { # save_as isn't METS or DSpace
    307         print $outhandle "docsave::process unrecognised saveas type, $save_as\n";
    308         return;
    309     }
    310 
    311     if ($save_as eq "METS") {
    312         $short_doc_file = util::filename_cat ($doc_dir, "docmets.xml");
    313     } elsif ($save_as eq "DSpace") {
    314         #my $short_txt_doc_file=&util::filename_cat ($doc_dir, "doctxt.xml");
    315         $short_doc_file=&util::filename_cat ($doc_dir, "dublin_core.xml");
    316     } else {
    317         return;
    318     }
    319    
    320     }
     314    if ($save_as eq "GA") {
     315    $short_doc_file = util::filename_cat ($doc_dir, "doc.xml");
     316    } elsif ($save_as eq "METS") {
     317    $short_doc_file = &util::filename_cat ($doc_dir, "docmets.xml");
     318    } elsif ($save_as eq "DSpace") {
     319    $short_doc_file=&util::filename_cat ($doc_dir, "dublin_core.xml");
     320    } else {
     321    return;
     322    }
     323
    321324    #save for later (for close_file_output())
    322325    $self->{'short_doc_file'} = $short_doc_file;   
     
    336339    my ($metadata);
    337340    if (defined ($self->{'sortmeta'})) {
    338     $metadata = $doc_obj->get_metadata_element($doc_obj->get_top_section(),
    339                            $self->{'sortmeta'});
     341    $metadata = $doc_obj->get_metadata_element($top_section,$self->{'sortmeta'});
    340342    }
    341343    if (defined ($metadata) && $metadata) {
     
    349351    $metadata = &sorttools::format_metadata_for_sorting($self->{'sortmeta'}, $metadata, $doc_obj);
    350352    }
    351     # store reference in the archive_info and export_info
    352     if ($service eq "export") {
    353     $self->{'export_info'}->add_info($OID, $short_doc_file, $metadata);
    354     } elsif ($service eq "import") {
    355     $self->{'archive_info'}->add_info($OID, $short_doc_file, $metadata);
    356     }
     353
     354    # store reference in the relevant info object (archive_info,export_info,...)
     355    $output_info->add_info($OID, $short_doc_file, $metadata);
    357356}
    358357
     
    414413    my $self = shift (@_);
    415414    my ($OID, $source_filename) = @_;
    416     my $doc_info;
     415
     416    my $service = $self-> {'service'};
     417
     418    my $working_dir  = $self->getoutputdir();
     419    my $working_info = $self->getoutputinfo();
     420    return if (!defined $working_info);
     421
     422    my $doc_info = $working_info->get_info($OID);
    417423    my $doc_dir = '';
    418     my $service = $self-> {'service'};
    419     my $working_dir;
    420     my $working_info;
    421 
    422     if ($service eq "import") {
    423     $doc_info = $self->{'archive_info'}->get_info($OID);
    424     $working_dir = $self->{'archive_dir'};
    425     $working_info = $self->{'archive_info'};
    426     } elsif ($service eq "export") {
    427     $doc_info =$self->{'export_info'}->get_info($OID);
    428         $working_dir = $self->{'export_dir'};
    429     $working_info = $self->{'export_info'};
    430     } else {
    431     return;
    432     }
     424
    433425    if (defined $doc_info && scalar(@$doc_info) >= 1) {
    434426    # this OID already has an assigned directory, use the
     
    449441    # have to get a new document directory
    450442
    451     if ($service eq "import") {
     443    if (($service eq "import") || ($service eq "unbuild")) {
    452444        my $doc_dir_rest = $OID;
    453445        my $doc_dir_num = 0;
     
    465457    else {
    466458        # Export formats such as DSpace need the directory structure to
    467         # be flat.  This is simple to arrange (set 'doc_dir' to bit the
     459        # be flat.  This is simple to arrange (set 'doc_dir' to be the
    468460        # documents OID) but breaks Windows 3.1 file system compliance.
    469         # Such a loss is not a bit thing in this situation as such
     461        # Such a loss is not a big thing in this situation as such
    470462        # systems don't run on Windows 3.1 anyway.
    471463
     
    485477
    486478    my $outhandle = $self->{'outhandle'};
    487    
     479    my $service = $self->{'service'};
     480    my $save_as = $self->{'saveas'};
     481
     482    my $output_dir = $self->getoutputdir();
     483    return if (!defined $output_dir);
     484
     485    my $working_dir = &util::filename_cat($output_dir, $doc_dir);
     486
    488487    my @assoc_files = ();
    489488    my $filename;;
    490     my $working_dir;
    491     my $service = $self->{'service'};
    492     my $save_as = $self->{'saveas'};
    493 
    494     if ($service eq "import") {
    495     $working_dir = $self->{'archive_dir'};
    496     } elsif ($service eq "export"){
    497     $working_dir = $self->{'export_dir'};
    498     } else {
    499     return;
    500     }
    501489
    502490    my $source_filename = $doc_obj->get_source_filename();
     
    523511    print $handle "$tail_filename\n";
    524512
    525     $filename = &util::filename_cat($working_dir, $doc_dir, $tail_filename);
     513    $filename = &util::filename_cat($working_dir, $tail_filename);
    526514    &util::hard_link ($source_filename, $filename);
    527515    }
     
    552540        }
    553541
    554         $filename = &util::filename_cat($working_dir, $doc_dir, $afile);
     542        $filename = &util::filename_cat($working_dir, $afile);
    555543
    556544
     
    605593    }
    606594
    607     # store reference in the archive_info and export_infor
    608     if ($service eq "import") {
    609     $self->{'archive_info'}->add_info($OID, $short_doc_file);
    610     } elsif ($service eq "export") {
    611     $self->{'export_info'}->add_info($OID, $short_doc_file);
    612     } else {
    613     return;
    614     }
     595    # store reference in relevant info object (archive_info,export_info,...)
     596    my $output_info = $self->getoutputinfo();
     597    return 0 if (!defined $output_info);
     598    $output_info->add_info($OID, $short_doc_file);
     599
    615600    return 1;
    616601}
     
    643628    my $self = shift(@_);
    644629    my ($handle) = @_;
    645     print $handle "<the end of the file>\n";
     630    # Nothing needs to be output at present
    646631}
    647632
    648633sub output_mets_xml_header(){
    649634    my $self = shift(@_);
    650     my ($handle, $OID) = @_;
     635    my ($handle, $OID, $doc_title) = @_;
     636
     637    my $version = $self->{'saveas_version'};
     638
     639    my $extra_attr = "";
     640    if ($version eq "fedora") {
     641    my $fnamespace = $ENV{'FEDORA_PID_NAMESPACE'};
     642    my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
     643
     644    $extra_attr = "OBJID=\"$oid_namespace:$OID\" TYPE=\"FedoraObject\" LABEL=\"$doc_title\"";
     645    }
     646    else {
     647    # Greenstone METS profile
     648    $extra_attr = "OBJID=\"$OID:2\"";
     649    }
     650
    651651
    652652    print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n";
     
    659659    print $handle '           http://www.greenstone.org/namespace/gsdlmetadata/1.0/' . "\n";
    660660    print $handle '           http://www.greenstone.org/namespace/gsdlmetadata/1.0/gsdl_metadata.xsd"' . "\n";
    661     print $handle '           OBJID="'. $OID. ':2">' . "\n";
     661    print $handle "  $extra_attr>\n";
     662
     663    if ($version eq "fedora") {
     664    print $handle '<mets:metsHdr RECORDSTATUS="A"/>'. "\n"; # A = active
     665    }
     666
    662667}
    663668
Note: See TracChangeset for help on using the changeset viewer.