Changeset 22413

Show
Ignore:
Timestamp:
16.07.2010 14:13:01 (9 years ago)
Author:
davidb
Message:

Initial pass at getting the main code to import.pl (and the very similar export.pl) structured as a shared module

Location:
main/trunk/greenstone2
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/import.pl

    r22037 r22413  
    6464 
    6565use strict; 
    66 no strict 'refs'; # allow filehandles to be variables and vice versa 
    67 no strict 'subs'; # allow barewords (eg STDERR) as function arguments 
    68  
    69 use arcinfo; 
    70 use colcfg; 
    71 use dbutil; 
    72 use plugin; 
    73 use plugout; 
    74 use manifest; 
    7566use inexport; 
    76 use util; 
    77 use scriptutil; 
    78 use FileHandle; 
    79 use gsprintf 'gsprintf'; 
    80 use printusage; 
    81 use parse2; 
    82  
    83  
    8467 
    8568my $oidtype_list =  
     
    258241    'range' => "0,", 
    259242    # parsearg left "" as default 
    260     #'deft' => "2", 
     243    'deft' => "2", 
    261244    'reqd' => "no", 
    262245    'modegli' => "3" }, 
     
    277260 
    278261 
     262 
     263sub main  
     264{ 
     265    my $inexport = new inexport(\@ARGV,$options); 
     266     
     267    my $collection = $inexport->get_collection(); 
     268    my $collect_cfg = $inexport->read_collection_cfg($collection,$options);     
     269    $inexport->set_collection_options("import",$collect_cfg); 
     270     
     271    my $pluginfo = $inexport->process_files("import",$collect_cfg); 
     272 
     273    $inexport->generate_statistics("import",$pluginfo); 
     274} 
     275 
     276 
    279277&main(); 
    280  
    281 sub main { 
    282     # params 
    283     my ($language, $verbosity, $debug, 
    284     $collectdir, $importdir, $archivedir, $site, $manifest,  
    285     $incremental, $incremental_mode, $keepold, $removeold,  
    286     $saveas, 
    287     $OIDtype, $OIDmetadata,  
    288     $maxdocs, $statsfile, 
    289     $out, $faillog, $gli, 
    290     $gzip, $groupsize,  
    291     $sortmeta, $removeprefix, $removesuffix  
    292     ); 
    293  
    294     my $xml = 0; 
    295  
    296     # other vars 
    297     my ($configfilename, $collection, $collectcfg,  
    298     $arcinfo_doc_filename, $arcinfo_src_filename, $archive_info,  
    299     $gs_mode, 
    300     $processor, $pluginfo); 
    301  
    302     my $service = "import"; 
    303  
    304     my $hashParsingResult = {}; 
    305     # general options available to all plugins 
    306     my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options"); 
    307     # Parse returns -1 if something has gone wrong 
    308     if ($intArgLeftinAfterParsing == -1) 
    309     { 
    310     &PrintUsage::print_txt_usage($options, "{import.params}"); 
    311     die "\n"; 
    312     } 
    313      
    314     foreach my $strVariable (keys %$hashParsingResult) 
    315     { 
    316     eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}"; 
    317     } 
    318  
    319     # If $language has been specified, load the appropriate resource bundle 
    320     # (Otherwise, the default resource bundle will be loaded automatically) 
    321     if ($language && $language =~ /\S/) { 
    322     &gsprintf::load_language_specific_resource_bundle($language); 
    323     } 
    324  
    325     if ($xml) { 
    326         &PrintUsage::print_xml_usage($options); 
    327     print "\n"; 
    328     return; 
    329     } 
    330  
    331     if ($gli) { # the gli wants strings to be in UTF-8 
    332     &gsprintf::output_strings_in_UTF8;  
    333     } 
    334      
    335     # now check that we had exactly one leftover arg, which should be  
    336     # the collection name. We don't want to do this earlier, cos  
    337     # -xml arg doesn't need a collection name 
    338     # Or if the user specified -h, then we output the usage also 
    339     if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/)) 
    340     { 
    341     &PrintUsage::print_txt_usage($options, "{import.params}"); 
    342     die "\n"; 
    343     } 
    344  
    345     my $close_out = 0; 
    346     if ($out !~ /^(STDERR|STDOUT)$/i) { 
    347     open (OUT, ">$out") || 
    348         (&gsprintf(STDERR, "{common.cannot_open_output_file}: $!\n", $out) && die); 
    349     $out = 'import::OUT'; 
    350     $close_out = 1; 
    351     } 
    352     $out->autoflush(1); 
    353  
    354     # get and check the collection name 
    355     if (($collection = &colcfg::use_collection($site, @ARGV, $collectdir)) eq "") { 
    356     &PrintUsage::print_txt_usage($options, "{import.params}"); 
    357     die "\n"; 
    358     } 
    359  
    360     # add collection's perllib dir  into include path in  
    361     # case we have collection specific modules 
    362     unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib"); 
    363  
    364     # check that we can open the faillog 
    365     if ($faillog eq "") { 
    366     $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log"); 
    367     } 
    368     open (FAILLOG, ">$faillog") || 
    369     (&gsprintf(STDERR, "{import.cannot_open_fail_log}\n", $faillog) && die); 
    370  
    371      
    372     my $faillogname = $faillog; 
    373     $faillog = 'import::FAILLOG'; 
    374     $faillog->autoflush(1); 
    375      
    376     # Read in the collection configuration file. 
    377     ($configfilename, $gs_mode) = &colcfg::get_collect_cfg_name($out); 
    378     $collectcfg = &colcfg::read_collection_cfg ($configfilename, $gs_mode); 
    379      
    380     # If the infodbtype value wasn't defined in the collect.cfg file, use the default 
    381     if (!defined($collectcfg->{'infodbtype'})) 
    382     { 
    383       $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type(); 
    384     } 
    385  
    386     if (defined $collectcfg->{'importdir'} && $importdir eq "") { 
    387     $importdir = $collectcfg->{'importdir'}; 
    388     } 
    389     if (defined $collectcfg->{'archivedir'} && $archivedir eq "") { 
    390     $archivedir = $collectcfg->{'archivedir'}; 
    391     } 
    392     # fill in the default import and archives directories if none 
    393     # were supplied, turn all \ into / and remove trailing / 
    394     $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq ""; 
    395     $importdir =~ s/[\\\/]+/\//g; 
    396     $importdir =~ s/\/$//; 
    397     if (!-e $importdir) { 
    398     &gsprintf($out, "{import.no_import_dir}\n\n", $importdir); 
    399     die "\n"; 
    400     } 
    401  
    402     $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives") if $archivedir eq ""; 
    403     $archivedir =~ s/[\\\/]+/\//g; 
    404     $archivedir =~ s/\/$//; 
    405  
    406     my $plugins = []; 
    407     if (defined $collectcfg->{'plugin'}) { 
    408     $plugins = $collectcfg->{'plugin'}; 
    409     } 
    410     #some global options for the plugins 
    411     my @global_opts = (); 
    412  
    413     if ($verbosity !~ /\d+/) { 
    414     if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) { 
    415         $verbosity = $collectcfg->{'verbosity'}; 
    416     } else { 
    417         $verbosity = 2; # the default 
    418     } 
    419     } 
    420     if (defined $collectcfg->{'manifest'} && $manifest eq "") { 
    421     $manifest = $collectcfg->{'manifest'}; 
    422     } 
    423  
    424     if (defined $collectcfg->{'gzip'} && !$gzip) { 
    425     if ($collectcfg->{'gzip'} =~ /^true$/i) { 
    426         $gzip = 1; 
    427     } 
    428     } 
    429  
    430     if ($maxdocs !~ /\-?\d+/) { 
    431     if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) { 
    432         $maxdocs = $collectcfg->{'maxdocs'}; 
    433     } else { 
    434         $maxdocs = -1; # the default 
    435     } 
    436     } 
    437     if ($groupsize == 1) { 
    438     if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) { 
    439         $groupsize = $collectcfg->{'groupsize'}; 
    440     } 
    441     } 
    442  
    443     if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental|assigned|dirname)$/ )) { 
    444     if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental|assigned|dirname)$/) { 
    445         $OIDtype = $collectcfg->{'OIDtype'}; 
    446     } else { 
    447         $OIDtype = "hash"; # the default 
    448     } 
    449     } 
    450  
    451     if ((!defined $OIDmetadata) || ($OIDmetadata eq "")) { 
    452     if (defined $collectcfg->{'OIDmetadata'}) { 
    453         $OIDmetadata = $collectcfg->{'OIDmetadata'}; 
    454     } else { 
    455         $OIDmetadata = "dc.Identifier"; # the default 
    456     } 
    457     } 
    458  
    459     if (defined $collectcfg->{'sortmeta'} && (!defined $sortmeta || $sortmeta eq "")) { 
    460     $sortmeta = $collectcfg->{'sortmeta'}; 
    461     } 
    462     # sortmeta cannot be used with group size 
    463     $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/; 
    464     if (defined $sortmeta && $groupsize > 1) { 
    465     &gsprintf($out, "{import.cannot_sort}\n\n"); 
    466     $sortmeta = undef; 
    467     } 
    468      
    469     if (defined $collectcfg->{'removeprefix'} && $removeprefix eq "") { 
    470     $removeprefix = $collectcfg->{'removeprefix'}; 
    471     } 
    472      
    473     if (defined $collectcfg->{'removesuffix'} && $removesuffix eq "") { 
    474     $removesuffix = $collectcfg->{'removesuffix'}; 
    475     } 
    476     if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) { 
    477     $debug = 1; 
    478     } 
    479     if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) { 
    480     $gli = 1; 
    481     } 
    482     $gli = 0 unless defined $gli; 
    483         
    484     # check keepold and removeold 
    485     ($removeold, $keepold, $incremental, $incremental_mode)  
    486     = &scriptutil::check_removeold_and_keepold($removeold, $keepold,  
    487                            $incremental, "archives",  
    488                            $collectcfg); 
    489   
    490  
    491     print STDERR "<Import>\n" if $gli; 
    492      
    493     my $manifest_lookup = new manifest($collectcfg->{'infodbtype'},$archivedir); 
    494     if ($manifest ne "") {   
    495     my $manifest_filename = $manifest; 
    496  
    497     if ($manifest_filename !~ m/^[\\\/]/) { 
    498         $manifest_filename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, $manifest_filename); 
    499     } 
    500  
    501     $manifest =~ s/[\\\/]+/\//g; 
    502     $manifest =~ s/\/$//; 
    503  
    504     $manifest_lookup->parse($manifest_filename); 
    505     } 
    506  
    507  
    508     # load all the plugins 
    509     $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts, $incremental_mode); 
    510     if (scalar(@$pluginfo) == 0) { 
    511     &gsprintf($out, "{import.no_plugins_loaded}\n"); 
    512     die "\n"; 
    513     } 
    514  
    515     # remove the old contents of the archives directory (and tmp directory) if needed 
    516     if ($removeold) { 
    517     if (-e $archivedir) { 
    518         &gsprintf($out, "{import.removing_archives}\n"); 
    519         &util::rm_r ($archivedir); 
    520     } 
    521     my $tmpdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "tmp"); 
    522     $tmpdir =~ s/[\\\/]+/\//g; 
    523     $tmpdir =~ s/\/$//; 
    524     if (-e $tmpdir) { 
    525         &gsprintf($out, "{import.removing_tmpdir}\n"); 
    526         &util::rm_r ($tmpdir); 
    527     } 
    528     } 
    529  
    530     # create the archives dir if needed 
    531     &util::mk_all_dir($archivedir); 
    532  
    533     # read the archive information file 
    534 ##  $arcinfo_doc_filename = &util::filename_cat ($archivedir, "archives.inf"); 
    535  
    536     # BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files (won't do anything for other infodbtypes) 
    537     &util::rename_ldb_or_bdb_file(&util::filename_cat($archivedir, "archiveinf-doc")); 
    538     &util::rename_ldb_or_bdb_file(&util::filename_cat($archivedir, "archiveinf-src")); 
    539  
    540     $arcinfo_doc_filename = &dbutil::get_infodb_file_path($collectcfg->{'infodbtype'}, "archiveinf-doc", $archivedir); 
    541     $arcinfo_src_filename = &dbutil::get_infodb_file_path($collectcfg->{'infodbtype'}, "archiveinf-src", $archivedir); 
    542                              
    543     $archive_info = new arcinfo ($collectcfg->{'infodbtype'}); 
    544     $archive_info->load_info ($arcinfo_doc_filename); 
    545  
    546     if ($manifest eq "") { 
    547     # Load in list of files in import folder from last import (if present) 
    548     $archive_info->load_prev_import_filelist ($arcinfo_src_filename); 
    549     } 
    550  
    551     ####Use Plugout#### 
    552     my ($plugout);  
    553     if (defined $collectcfg->{'plugout'}) { 
    554     # If a plugout was specified in the collect.cfg file, assume it is sensible 
    555     # We can't check the name because it could be anything, if it is a custom plugout 
    556     $plugout = $collectcfg->{'plugout'}; 
    557     } 
    558     else{ 
    559     if ($saveas !~ /^(GreenstoneXML|GreenstoneMETS)$/) { 
    560         push @$plugout,"GreenstoneXMLPlugout"; 
    561     } 
    562     else{ 
    563         push @$plugout,$saveas."Plugout"; 
    564     } 
    565     } 
    566  
    567     push @$plugout,("-output_info",$archive_info) if (defined $archive_info);  
    568     push @$plugout,("-verbosity",$verbosity) if (defined $verbosity); 
    569     push @$plugout,("-gzip_output") if ($gzip); 
    570     push @$plugout,("-group_size",$groupsize) if (defined $groupsize); 
    571     push @$plugout,("-output_handle",$out) if (defined $out); 
    572     push @$plugout,("-debug") if ($debug); 
    573      
    574     $processor = &plugout::load_plugout($plugout);                         
    575     $processor->setoutputdir ($archivedir); 
    576     $processor->set_sortmeta ($sortmeta, $removeprefix, $removesuffix) if defined $sortmeta; 
    577     $processor->set_OIDtype ($OIDtype, $OIDmetadata); 
    578      
    579     &plugin::begin($pluginfo, $importdir, $processor, $maxdocs, $gli); 
    580      
    581     if ($removeold) { 
    582         # occasionally, plugins may want to do something on remove old, eg pharos image indexing 
    583     &plugin::remove_all($pluginfo, $importdir, $processor, $maxdocs, $gli); 
    584     } 
    585     if ($manifest eq "") { 
    586     # process the import directory 
    587     my $block_hash = {}; 
    588     my $metadata = {}; 
    589     # gobal blocking pass may set up some metadata 
    590     &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli); 
    591  
    592  
    593     if ($incremental || $incremental_mode eq "onlyadd") { 
    594  
    595         &inexport::prime_doc_oid_count($archivedir); 
    596  
    597  
    598         # Can now work out which files were new, already existed, and have 
    599         # been deleted 
    600          
    601         &inexport::new_vs_old_import_diff($archive_info,$block_hash,$importdir, 
    602                           $archivedir,$verbosity,$incremental_mode); 
    603          
    604         my @new_files = sort keys %{$block_hash->{'new_files'}}; 
    605         if (scalar(@new_files>0)) { 
    606         print STDERR "New files and modified metadata files since last import:\n  "; 
    607         print STDERR join("\n  ",@new_files), "\n"; 
    608         } 
    609  
    610         if ($incremental) { 
    611                # only look for deletions if we are truely incremental 
    612         my @deleted_files = sort keys %{$block_hash->{'deleted_files'}}; 
    613         # Filter out any in gsdl/tmp area 
    614         my @filtered_deleted_files = (); 
    615         my $gsdl_tmp_area = &util::filename_cat($ENV{'GSDLHOME'}, "tmp"); 
    616         my $collect_tmp_area = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp"); 
    617         $gsdl_tmp_area = &util::filename_to_regex($gsdl_tmp_area); 
    618         $collect_tmp_area = &util::filename_to_regex($collect_tmp_area); 
    619                    
    620         foreach my $df (@deleted_files) { 
    621             next if ($df =~ m/^$gsdl_tmp_area/); 
    622             next if ($df =~ m/^$collect_tmp_area/); 
    623              
    624             push(@filtered_deleted_files,$df); 
    625         }        
    626          
    627  
    628         @deleted_files = @filtered_deleted_files; 
    629          
    630         if (scalar(@deleted_files)>0) { 
    631             print STDERR "Files deleted since last import:\n  "; 
    632             print STDERR join("\n  ",@deleted_files), "\n"; 
    633          
    634          
    635             &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@deleted_files); 
    636              
    637             &inexport::mark_docs_for_deletion($archive_info,$block_hash,\@deleted_files, $archivedir,$verbosity, "delete"); 
    638         } 
    639          
    640         my @reindex_files = sort keys %{$block_hash->{'reindex_files'}}; 
    641          
    642         if (scalar(@reindex_files)>0) { 
    643             print STDERR "Files to reindex since last import:\n  "; 
    644             print STDERR join("\n  ",@reindex_files), "\n"; 
    645             &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@reindex_files); 
    646             &inexport::mark_docs_for_deletion($archive_info,$block_hash,\@reindex_files, $archivedir,$verbosity, "reindex"); 
    647         } 
    648                  
    649         } 
    650          
    651         # Play it safe, and run through the entire folder, only processing new or edited files 
    652         &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 
    653  
    654     } 
    655     else { 
    656         &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 
    657     } 
    658  
    659     } 
    660     else 
    661     { 
    662     #  
    663     # 1. Process delete files first 
    664     #  
    665  
    666     my @deleted_files = keys %{$manifest_lookup->{'delete'}}; 
    667     my @full_deleted_files = (); 
    668  
    669     # ensure all filenames are absolute 
    670     foreach my $df (@deleted_files) {        
    671         my $full_df = 
    672         (&util::filename_is_absolute($df))  
    673         ? $df 
    674         : &util::filename_cat($importdir,$df); 
    675  
    676         push(@full_deleted_files,$full_df); 
    677     } 
    678      
    679     &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@full_deleted_files); 
    680     &inexport::mark_docs_for_deletion($archive_info,{}, 
    681                       \@full_deleted_files, 
    682                       $archivedir, $verbosity, "delete"); 
    683  
    684  
    685     #  
    686     # 2. Now files for reindexing 
    687     #  
    688  
    689     my @reindex_files = keys %{$manifest_lookup->{'reindex'}}; 
    690     my @full_reindex_files = (); 
    691  
    692     # ensure all filenames are absolute 
    693     foreach my $rf (@reindex_files) {        
    694         my $full_rf = 
    695         (&util::filename_is_absolute($rf))  
    696         ? $rf 
    697         : &util::filename_cat($importdir,$rf); 
    698  
    699         push(@full_reindex_files,$full_rf); 
    700     } 
    701      
    702     &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@full_reindex_files); 
    703     &inexport::mark_docs_for_deletion($archive_info,{},\@full_reindex_files, $archivedir,$verbosity, "reindex"); 
    704  
    705     # And now ensure the new version of the file processed by appropriate 
    706     # plugin 
    707     foreach my $full_rf (@full_reindex_files) { 
    708         &plugin::read ($pluginfo, "", $full_rf, {}, {}, $processor, $maxdocs, 0, $gli); 
    709     } 
    710  
    711  
    712     #  
    713     # 3. Now finally any new files 
    714     #  
    715  
    716     foreach my $file (keys %{$manifest_lookup->{'index'}}) { 
    717         &plugin::read ($pluginfo, $importdir, $file, {}, {}, $processor, $maxdocs, 0, $gli); 
    718     } 
    719  
    720  
    721     } 
    722  
    723     &plugin::end($pluginfo, $processor); 
    724  
    725     &plugin::deinit($pluginfo, $processor); 
    726  
    727     # Store the value of OIDCount (used in doc.pm) so it can be 
    728     # restored correctly to this value on an incremental build 
    729     &inexport::store_doc_oid_count($archivedir); 
    730  
    731     # write out the archive information file 
    732     $processor->close_file_output() if $groupsize > 1; 
    733     $processor->close_group_output() if $processor->is_group(); 
    734  
    735 # The following 'if' statement is in the export.pl version of the script, 
    736 # The reason for the 'if' statement is now given in export.pl 
    737 # Unclear at this point if the same should be done here 
    738 ##    if (($saveas =~ m/^.*METS$/) || ($saveas eq "MARC")) { 
    739     # Not all export types need this (e.g. DSpace) 
    740  
    741     # should we still do this in debug mode?? 
    742  
    743     # for backwards compatability with archvies.inf file 
    744     if ($arcinfo_doc_filename =~ m/(contents)|(\.inf)$/) { 
    745     $archive_info->save_info($arcinfo_doc_filename); 
    746     } 
    747     else { 
    748     $archive_info->save_revinfo_db($arcinfo_src_filename); 
    749     } 
    750  
    751  
    752 ##    } 
    753      
    754     # write out import stats 
    755     my $close_stats = 0; 
    756     if ($statsfile !~ /^(STDERR|STDOUT)$/i) { 
    757     if (open (STATS, ">$statsfile")) { 
    758         $statsfile = 'import::STATS'; 
    759         $close_stats = 1; 
    760     } else { 
    761         &gsprintf($out, "{import.cannot_open_stats_file}", $statsfile); 
    762         &gsprintf($out, "{import.stats_backup}\n"); 
    763         $statsfile = 'STDERR'; 
    764     } 
    765     } 
    766  
    767     &gsprintf($out, "\n"); 
    768     &gsprintf($out, "*********************************************\n"); 
    769     &gsprintf($out, "{import.complete}\n"); 
    770     &gsprintf($out, "*********************************************\n"); 
    771  
    772     &plugin::write_stats($pluginfo, $statsfile, $faillogname, $gli); 
    773     if ($close_stats) { 
    774     close STATS; 
    775     } 
    776  
    777     close OUT if $close_out; 
    778     close FAILLOG; 
    779 } 
  • main/trunk/greenstone2/perllib/inexport.pm

    r22327 r22413  
    11########################################################################### 
    22# 
    3 # inexport.pm -- useful utilities to support import.pl and export.pl 
     3# inexport.pm -- useful class to support import.pl and export.pl 
    44# A component of the Greenstone digital library software 
    55# from the New Zealand Digital Library Project at the  
     
    2828use strict; 
    2929 
    30 use File::Basename; 
    31  
     30no strict 'refs'; # allow filehandles to be variables and vice versa 
     31no strict 'subs'; # allow barewords (eg STDERR) as function arguments 
     32 
     33use arcinfo; 
     34use colcfg; 
     35use dbutil; 
     36use plugin; 
     37use plugout; 
     38use manifest; 
     39use inexport; 
    3240use dbutil; 
    3341use util; 
     42use scriptutil; 
     43use FileHandle; 
     44use gsprintf 'gsprintf'; 
     45use printusage; 
     46use parse2; 
     47 
     48use File::Basename; 
     49 
     50sub new  
     51{ 
     52    my $class = shift (@_); 
     53    my ($argv,$options) = @_; 
     54 
     55    my $self = { 'xml' => 0 }; 
     56 
     57    # general options available to all plugins 
     58    my $arguments = $options->{'args'}; 
     59    my $intArgLeftinAfterParsing = parse2::parse($argv,$arguments,$self,"allow_extra_options"); 
     60    # Parse returns -1 if something has gone wrong 
     61    if ($intArgLeftinAfterParsing == -1) 
     62    { 
     63    &PrintUsage::print_txt_usage($options, "{import.params}"); 
     64    die "\n"; 
     65    } 
     66     
     67    my $language = $self->{'language'}; 
     68    # If $language has been specified, load the appropriate resource bundle 
     69    # (Otherwise, the default resource bundle will be loaded automatically) 
     70    if ($language && $language =~ /\S/) { 
     71    &gsprintf::load_language_specific_resource_bundle($language); 
     72    } 
     73 
     74    if ($self->{'xml'}) { 
     75        &PrintUsage::print_xml_usage($options); 
     76    print "\n"; 
     77    return; 
     78    } 
     79 
     80    if ($self->{'gli'}) { # the gli wants strings to be in UTF-8 
     81    &gsprintf::output_strings_in_UTF8;  
     82    } 
     83     
     84    # now check that we had exactly one leftover arg, which should be  
     85    # the collection name. We don't want to do this earlier, cos  
     86    # -xml arg doesn't need a collection name 
     87    # Or if the user specified -h, then we output the usage also 
     88    if ($intArgLeftinAfterParsing != 1 || (@$argv && $argv->[0] =~ /^\-+h/)) 
     89    { 
     90    &PrintUsage::print_txt_usage($options, "{import.params}"); 
     91    die "\n"; 
     92    } 
     93 
     94    $self->{'close_out'} = 0; 
     95    my $out = $self->{'out'}; 
     96    if ($out !~ /^(STDERR|STDOUT)$/i) { 
     97    open (OUT, ">$out") || 
     98        (&gsprintf(STDERR, "{common.cannot_open_output_file}: $!\n", $out) && die); 
     99    $out = 'import::OUT'; 
     100    $self->{'close_out'} = 1; 
     101    } 
     102    $out->autoflush(1); 
     103    $self->{'out'} = $out; 
     104 
     105    # @ARGV should be only one item, the name of the collection 
     106    $self->{'collection'} = shift @$argv; 
     107 
     108    return bless $self, $class; 
     109} 
     110 
     111sub get_collection 
     112{ 
     113    my $self = shift @_; 
     114     
     115    return $self->{'collection'}; 
     116} 
     117 
     118 
     119sub read_collection_cfg 
     120{ 
     121    my $self = shift @_; 
     122    my ($collection,$options) = @_; 
     123 
     124    my $collectdir = $self->{'collectdir'}; 
     125    my $site       = $self->{'site'}; 
     126    my $out        = $self->{'out'}; 
     127 
     128    if (($collection = &colcfg::use_collection($site, $collection, $collectdir)) eq "") { 
     129    &PrintUsage::print_txt_usage($options, "{import.params}"); 
     130    die "\n"; 
     131    } 
     132 
     133    # add collection's perllib dir  into include path in  
     134    # case we have collection specific modules 
     135    unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib"); 
     136 
     137    # check that we can open the faillog 
     138    my $faillog = $self->{'faillog'}; 
     139    if ($faillog eq "") { 
     140    $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log"); 
     141    } 
     142    open (FAILLOG, ">$faillog") || 
     143    (&gsprintf(STDERR, "{import.cannot_open_fail_log}\n", $faillog) && die); 
     144 
     145     
     146    my $faillogname = $faillog; 
     147    $faillog = 'inexport::FAILLOG'; 
     148    $faillog->autoflush(1); 
     149    $self->{'faillog'} = $faillog; 
     150    $self->{'faillogname'} = $faillogname; 
     151 
     152    # Read in the collection configuration file. 
     153    my ($configfilename, $gs_mode) = &colcfg::get_collect_cfg_name($out); 
     154    my $collectcfg = &colcfg::read_collection_cfg ($configfilename, $gs_mode); 
     155 
     156    return $collectcfg; 
     157} 
     158 
     159sub set_collection_options 
     160{ 
     161    my $self = shift @_; 
     162    my ($inexport_mode,$collectcfg) = @_; 
     163 
     164    my $verbosity  = $self->{'verbosity'}; 
     165    print STDERR "**** verbosity = $verbosity\n\n\n"; 
     166 
     167    my $debug      = $self->{'debug'}; 
     168    my $importdir  = $self->{'importdir'}; 
     169    my $archivedir = $self->{'archivedir'}; 
     170    my $out        = $self->{'out'}; 
     171 
     172    # If the infodbtype value wasn't defined in the collect.cfg file, use the default 
     173    if (!defined($collectcfg->{'infodbtype'})) 
     174    { 
     175      $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type(); 
     176    } 
     177 
     178    if (defined $collectcfg->{'importdir'} && $importdir eq "") { 
     179    $importdir = $collectcfg->{'importdir'}; 
     180    } 
     181    if (defined $collectcfg->{'archivedir'} && $archivedir eq "") { 
     182    $archivedir = $collectcfg->{'archivedir'}; 
     183    } 
     184    # fill in the default import and archives directories if none 
     185    # were supplied, turn all \ into / and remove trailing / 
     186    $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq ""; 
     187    $importdir =~ s/[\\\/]+/\//g; 
     188    $importdir =~ s/\/$//; 
     189    if (!-e $importdir) { 
     190    &gsprintf($out, "{import.no_import_dir}\n\n", $importdir); 
     191    die "\n"; 
     192    } 
     193    $self->{'importdir'} = $importdir; 
     194 
     195    if ($archivedir eq "") { 
     196    if ($inexport_mode eq "import") { 
     197        $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives"); 
     198    } 
     199    elsif ($inexport_mode eq "export") { 
     200        $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "export"); 
     201    } 
     202    else { 
     203        print STDERR "Warning: Unrecognized import/export mode '$inexport_mode'\n"; 
     204        print STDERR "         Defaulting to 'archives' for file output\n"; 
     205        $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives"); 
     206    } 
     207    } 
     208 
     209    $archivedir =~ s/[\\\/]+/\//g; 
     210    $archivedir =~ s/\/$//; 
     211    $self->{'archivedir'} = $archivedir; 
     212 
     213    if ($verbosity !~ /\d+/) { 
     214    if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) { 
     215        $verbosity = $collectcfg->{'verbosity'}; 
     216    } else { 
     217        $verbosity = 2; # the default 
     218    } 
     219    } 
     220    if (defined $collectcfg->{'manifest'} && $self->{'manifest'} eq "") { 
     221    $self->{'manifest'} = $collectcfg->{'manifest'}; 
     222    } 
     223 
     224    if (defined $collectcfg->{'gzip'} && !$self->{'gzip'}) { 
     225    if ($collectcfg->{'gzip'} =~ /^true$/i) { 
     226        $self->{'gzip'} = 1; 
     227    } 
     228    } 
     229 
     230    if ($self->{'maxdocs'} !~ /\-?\d+/) { 
     231    if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) { 
     232        $self->{'maxdocs'} = $collectcfg->{'maxdocs'}; 
     233    } else { 
     234        $self->{'maxdocs'} = -1; # the default 
     235    } 
     236    } 
     237    if ($self->{'groupsize'} == 1) { 
     238    if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) { 
     239        $self->{'groupsize'} = $collectcfg->{'groupsize'}; 
     240    } 
     241    } 
     242 
     243    if (!defined $self->{'OIDtype'}  
     244    || ($self->{'OIDtype'} !~ /^(hash|incremental|assigned|dirname)$/ )) { 
     245    if (defined $collectcfg->{'OIDtype'}  
     246        && $collectcfg->{'OIDtype'} =~ /^(hash|incremental|assigned|dirname)$/) { 
     247        $self->{'OIDtype'} = $collectcfg->{'OIDtype'}; 
     248    } else { 
     249        $self->{'OIDtype'} = "hash"; # the default 
     250    } 
     251    } 
     252 
     253    if ((!defined $self->{'OIDmetadata'}) || ($self->{'OIDmetadata'} eq "")) { 
     254    if (defined $collectcfg->{'OIDmetadata'}) { 
     255        $self->{'OIDmetadata'} = $collectcfg->{'OIDmetadata'}; 
     256    } else { 
     257        $self->{'OIDmetadata'} = "dc.Identifier"; # the default 
     258    } 
     259    } 
     260 
     261    my $sortmeta = $self->{'sortmeta'}; 
     262    if (defined $collectcfg->{'sortmeta'} && (!defined $sortmeta || $sortmeta eq "")) { 
     263    $sortmeta = $collectcfg->{'sortmeta'}; 
     264    } 
     265    # sortmeta cannot be used with group size 
     266    $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/; 
     267    if (defined $sortmeta && $self->{'groupsize'} > 1) { 
     268    &gsprintf($out, "{import.cannot_sort}\n\n"); 
     269    $sortmeta = undef; 
     270    } 
     271    $self->{'sortmeta'} = $sortmeta; 
     272 
     273    if (defined $collectcfg->{'removeprefix'} && $self->{'removeprefix'} eq "") { 
     274    $self->{'removeprefix'} = $collectcfg->{'removeprefix'}; 
     275    } 
     276     
     277    if (defined $collectcfg->{'removesuffix'} && $self->{'removesuffix'} eq "") { 
     278    $self->{'removesuffix'} = $collectcfg->{'removesuffix'}; 
     279    } 
     280    if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) { 
     281    $self->{'debug'} = 1; 
     282    } 
     283    if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) { 
     284    $self->{'gli'} = 1; 
     285    } 
     286    $self->{'gli'} = 0 unless defined $self->{'gli'}; 
     287        
     288    # check keepold and removeold 
     289    my ($removeold, $keepold, $incremental, $incremental_mode)  
     290    = &scriptutil::check_removeold_and_keepold($self->{'removeold'}, $self->{'keepold'},  
     291                           $self->{'incremental'}, "archives",  
     292                           $collectcfg); 
     293 
     294    $self->{'removeold'}        = $removeold; 
     295    $self->{'keepold'}          = $keepold; 
     296    $self->{'incremental'}      = $incremental; 
     297    $self->{'incremental_mode'} = $incremental_mode; 
     298} 
     299 
     300sub process_files 
     301{ 
     302    my $self = shift @_; 
     303    my ($inexport_mode,$collectcfg) = @_; 
     304 
     305    my $verbosity   = $self->{'verbosity'}; 
     306    my $debug       = $self->{'debug'}; 
     307 
     308    my $importdir   = $self->{'importdir'}; 
     309    my $archivedir  = $self->{'archivedir'}; 
     310 
     311    my $incremental = $self->{'incremental'}; 
     312    my $incremental_mode = $self->{'incremental_mode'}; 
     313 
     314    my $removeold   = $self->{'removeold'}; 
     315    my $keepold     = $self->{'keepold'}; 
     316 
     317    my $saveas      = $self->{'saveas'}; 
     318    my $OIDtype     = $self->{'OIDtype'}; 
     319    my $OIDmetadata = $self->{'OIDmetadata'}; 
     320 
     321    my $out         = $self->{'out'}; 
     322    my $faillog     = $self->{'faillog'}; 
     323 
     324    my $maxdocs     = $self->{'maxdocs'}; 
     325    my $gzip        = $self->{'gzip'}; 
     326    my $groupsize   = $self->{'groupsize'}; 
     327    my $sortmeta    = $self->{'sortmeta'}; 
     328 
     329    my $removeprefix = $self->{'removeprefix'}; 
     330    my $removesuffix = $self->{'removesuffix'}; 
     331 
     332    my $gli         = $self->{'gli'}; 
     333 
     334    print STDERR "<Import>\n" if $gli; 
     335     
     336    my $manifest_lookup = new manifest($collectcfg->{'infodbtype'},$archivedir); 
     337    if ($self->{'manifest'} ne "") {     
     338    my $manifest_filename = $self->{'manifest'}; 
     339 
     340    if (!&util::filename_is_absolute($manifest_filename)) { 
     341        $manifest_filename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, $manifest_filename); 
     342    } 
     343 
     344    $self->{'manifest'} =~ s/[\\\/]+/\//g; 
     345    $self->{'manifest'} =~ s/\/$//; 
     346 
     347    $manifest_lookup->parse($manifest_filename); 
     348    } 
     349 
     350    my $manifest = $self->{'manifest'}; 
     351 
     352    # load all the plugins 
     353    my $plugins = []; 
     354    if (defined $collectcfg->{'plugin'}) { 
     355    $plugins = $collectcfg->{'plugin'}; 
     356    } 
     357 
     358    #some global options for the plugins 
     359    my @global_opts = (); 
     360 
     361 
     362    my $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts, $incremental_mode); 
     363    if (scalar(@$pluginfo) == 0) { 
     364    &gsprintf($out, "{import.no_plugins_loaded}\n"); 
     365    die "\n"; 
     366    } 
     367 
     368    # remove the old contents of the archives directory (and tmp directory) if needed 
     369    if ($removeold) { 
     370    if (-e $archivedir) { 
     371        &gsprintf($out, "{import.removing_archives}\n"); 
     372        &util::rm_r ($archivedir); 
     373    } 
     374    my $tmpdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "tmp"); 
     375    $tmpdir =~ s/[\\\/]+/\//g; 
     376    $tmpdir =~ s/\/$//; 
     377    if (-e $tmpdir) { 
     378        &gsprintf($out, "{import.removing_tmpdir}\n"); 
     379        &util::rm_r ($tmpdir); 
     380    } 
     381    } 
     382 
     383    # create the archives dir if needed 
     384    &util::mk_all_dir($archivedir); 
     385 
     386    # read the archive information file 
     387##  my $arcinfo_doc_filename = &util::filename_cat ($archivedir, "archives.inf"); 
     388 
     389    # BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files (won't do anything for other infodbtypes) 
     390    &util::rename_ldb_or_bdb_file(&util::filename_cat($archivedir, "archiveinf-doc")); 
     391    &util::rename_ldb_or_bdb_file(&util::filename_cat($archivedir, "archiveinf-src")); 
     392 
     393    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($collectcfg->{'infodbtype'}, "archiveinf-doc", $archivedir); 
     394    my $arcinfo_src_filename = &dbutil::get_infodb_file_path($collectcfg->{'infodbtype'}, "archiveinf-src", $archivedir); 
     395                             
     396    my $archive_info = new arcinfo ($collectcfg->{'infodbtype'}); 
     397    $archive_info->load_info ($arcinfo_doc_filename); 
     398 
     399    if ($manifest eq "") { 
     400    # Load in list of files in import folder from last import (if present) 
     401    $archive_info->load_prev_import_filelist ($arcinfo_src_filename); 
     402    } 
     403 
     404    ####Use Plugout#### 
     405    my ($plugout);  
     406    if (defined $collectcfg->{'plugout'}) { 
     407    # If a plugout was specified in the collect.cfg file, assume it is sensible 
     408    # We can't check the name because it could be anything, if it is a custom plugout 
     409    $plugout = $collectcfg->{'plugout'}; 
     410    } 
     411    else{ 
     412    if ($saveas !~ /^(GreenstoneXML|GreenstoneMETS)$/) { 
     413        push @$plugout,"GreenstoneXMLPlugout"; 
     414    } 
     415    else{ 
     416        push @$plugout,$saveas."Plugout"; 
     417    } 
     418    } 
     419 
     420    push @$plugout,("-output_info",$archive_info) if (defined $archive_info);  
     421    push @$plugout,("-verbosity",$verbosity)      if (defined $verbosity); 
     422    push @$plugout,("-gzip_output")               if ($gzip); 
     423    push @$plugout,("-group_size",$groupsize)     if (defined $groupsize); 
     424    push @$plugout,("-output_handle",$out)        if (defined); 
     425    push @$plugout,("-debug")                     if ($debug); 
     426     
     427    my $processor = &plugout::load_plugout($plugout);                         
     428    $processor->setoutputdir ($archivedir); 
     429    $processor->set_sortmeta ($sortmeta, $removeprefix, $removesuffix) if defined $sortmeta; 
     430    $processor->set_OIDtype ($OIDtype, $OIDmetadata); 
     431     
     432    &plugin::begin($pluginfo, $importdir, $processor, $maxdocs, $gli); 
     433     
     434    if ($removeold) { 
     435        # occasionally, plugins may want to do something on remove old, eg pharos image indexing 
     436    &plugin::remove_all($pluginfo, $importdir, $processor, $maxdocs, $gli); 
     437    } 
     438    if ($manifest eq "") { 
     439    # process the import directory 
     440    my $block_hash = {}; 
     441    my $metadata = {}; 
     442    # gobal blocking pass may set up some metadata 
     443    &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli); 
     444 
     445 
     446    if ($incremental || $incremental_mode eq "onlyadd") { 
     447 
     448        prime_doc_oid_count($archivedir); 
     449 
     450 
     451        # Can now work out which files were new, already existed, and have 
     452        # been deleted 
     453         
     454        new_vs_old_import_diff($archive_info,$block_hash,$importdir, 
     455                   $archivedir,$verbosity,$incremental_mode); 
     456         
     457        my @new_files = sort keys %{$block_hash->{'new_files'}}; 
     458        if (scalar(@new_files>0)) { 
     459        print STDERR "New files and modified metadata files since last import:\n  "; 
     460        print STDERR join("\n  ",@new_files), "\n"; 
     461        } 
     462 
     463        if ($incremental) { 
     464               # only look for deletions if we are truely incremental 
     465        my @deleted_files = sort keys %{$block_hash->{'deleted_files'}}; 
     466        # Filter out any in gsdl/tmp area 
     467        my @filtered_deleted_files = (); 
     468        my $gsdl_tmp_area = &util::filename_cat($ENV{'GSDLHOME'}, "tmp"); 
     469        my $collect_tmp_area = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp"); 
     470        $gsdl_tmp_area = &util::filename_to_regex($gsdl_tmp_area); 
     471        $collect_tmp_area = &util::filename_to_regex($collect_tmp_area); 
     472                   
     473        foreach my $df (@deleted_files) { 
     474            next if ($df =~ m/^$gsdl_tmp_area/); 
     475            next if ($df =~ m/^$collect_tmp_area/); 
     476             
     477            push(@filtered_deleted_files,$df); 
     478        }        
     479         
     480 
     481        @deleted_files = @filtered_deleted_files; 
     482         
     483        if (scalar(@deleted_files)>0) { 
     484            print STDERR "Files deleted since last import:\n  "; 
     485            print STDERR join("\n  ",@deleted_files), "\n"; 
     486         
     487         
     488            &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@deleted_files); 
     489             
     490            mark_docs_for_deletion($archive_info,$block_hash,\@deleted_files, $archivedir,$verbosity, "delete"); 
     491        } 
     492         
     493        my @reindex_files = sort keys %{$block_hash->{'reindex_files'}}; 
     494         
     495        if (scalar(@reindex_files)>0) { 
     496            print STDERR "Files to reindex since last import:\n  "; 
     497            print STDERR join("\n  ",@reindex_files), "\n"; 
     498            &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@reindex_files); 
     499            mark_docs_for_deletion($archive_info,$block_hash,\@reindex_files, $archivedir,$verbosity, "reindex"); 
     500        } 
     501                 
     502        } 
     503         
     504        # Play it safe, and run through the entire folder, only processing new or edited files 
     505        &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 
     506 
     507    } 
     508    else { 
     509        &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 
     510    } 
     511 
     512    } 
     513    else 
     514    { 
     515    #  
     516    # 1. Process delete files first 
     517    #  
     518 
     519    my @deleted_files = keys %{$manifest_lookup->{'delete'}}; 
     520    my @full_deleted_files = (); 
     521 
     522    # ensure all filenames are absolute 
     523    foreach my $df (@deleted_files) {        
     524        my $full_df = 
     525        (&util::filename_is_absolute($df))  
     526        ? $df 
     527        : &util::filename_cat($importdir,$df); 
     528 
     529        push(@full_deleted_files,$full_df); 
     530    } 
     531     
     532    &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@full_deleted_files); 
     533    mark_docs_for_deletion($archive_info,{}, 
     534                      \@full_deleted_files, 
     535                      $archivedir, $verbosity, "delete"); 
     536 
     537 
     538    #  
     539    # 2. Now files for reindexing 
     540    #  
     541 
     542    my @reindex_files = keys %{$manifest_lookup->{'reindex'}}; 
     543    my @full_reindex_files = (); 
     544 
     545    # ensure all filenames are absolute 
     546    foreach my $rf (@reindex_files) {        
     547        my $full_rf = 
     548        (&util::filename_is_absolute($rf))  
     549        ? $rf 
     550        : &util::filename_cat($importdir,$rf); 
     551 
     552        push(@full_reindex_files,$full_rf); 
     553    } 
     554     
     555    &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@full_reindex_files); 
     556    mark_docs_for_deletion($archive_info,{},\@full_reindex_files, $archivedir,$verbosity, "reindex"); 
     557 
     558    # And now ensure the new version of the file processed by appropriate 
     559    # plugin 
     560    foreach my $full_rf (@full_reindex_files) { 
     561        &plugin::read ($pluginfo, "", $full_rf, {}, {}, $processor, $maxdocs, 0, $gli); 
     562    } 
     563 
     564 
     565    #  
     566    # 3. Now finally any new files 
     567    #  
     568 
     569    foreach my $file (keys %{$manifest_lookup->{'index'}}) { 
     570        &plugin::read ($pluginfo, $importdir, $file, {}, {}, $processor, $maxdocs, 0, $gli); 
     571    } 
     572 
     573 
     574    } 
     575 
     576    &plugin::end($pluginfo, $processor); 
     577 
     578    &plugin::deinit($pluginfo, $processor); 
     579 
     580    # Store the value of OIDCount (used in doc.pm) so it can be 
     581    # restored correctly to this value on an incremental build 
     582    store_doc_oid_count($archivedir); 
     583 
     584    # write out the archive information file 
     585    $processor->close_file_output() if $groupsize > 1; 
     586    $processor->close_group_output() if $processor->is_group(); 
     587 
     588# The following 'if' statement is in the export.pl version of the script, 
     589# The reason for the 'if' statement is now given in export.pl 
     590# Unclear at this point if the same should be done here 
     591##    if (($saveas =~ m/^.*METS$/) || ($saveas eq "MARC")) { 
     592    # Not all export types need this (e.g. DSpace) 
     593 
     594    # should we still do this in debug mode?? 
     595 
     596    # for backwards compatability with archvies.inf file 
     597    if ($arcinfo_doc_filename =~ m/(contents)|(\.inf)$/) { 
     598    $archive_info->save_info($arcinfo_doc_filename); 
     599    } 
     600    else { 
     601    $archive_info->save_revinfo_db($arcinfo_src_filename); 
     602    } 
     603 
     604 
     605##    } 
     606 
     607    return $pluginfo; 
     608} 
     609 
     610 
     611sub generate_statistics 
     612{ 
     613    my $self = shift @_; 
     614    my ($inexport_mode,$pluginfo) = @_; 
     615 
     616    my $statsfile = $self->{'statsfile'}; 
     617    my $out       = $self->{'out'}; 
     618    my $faillogname = $self->{'faillogname'}; 
     619    my $gli       = $self->{'gli'}; 
     620 
     621    # write out import stats 
     622    my $close_stats = 0; 
     623    if ($statsfile !~ /^(STDERR|STDOUT)$/i) { 
     624    if (open (STATS, ">$statsfile")) { 
     625        $statsfile = 'import::STATS'; 
     626        $close_stats = 1; 
     627    } else { 
     628        &gsprintf($out, "{import.cannot_open_stats_file}", $statsfile); 
     629        &gsprintf($out, "{import.stats_backup}\n"); 
     630        $statsfile = 'STDERR'; 
     631    } 
     632    } 
     633 
     634    &gsprintf($out, "\n"); 
     635    &gsprintf($out, "*********************************************\n"); 
     636    &gsprintf($out, "{import.complete}\n"); 
     637    &gsprintf($out, "*********************************************\n"); 
     638 
     639    &plugin::write_stats($pluginfo, $statsfile, $faillogname, $gli); 
     640    if ($close_stats) { 
     641    close STATS; 
     642    } 
     643 
     644    close OUT if $self->{'close_out'}; 
     645    close FAILLOG; 
     646} 
     647 
     648 
     649 
     650 
     651 
    34652 
    35653