Changeset 22421

Show
Ignore:
Timestamp:
18.07.2010 16:36:56 (9 years ago)
Author:
davidb
Message:

Continued work on refactoring code to have better shared support for import.pl and export.pl

Location:
main/trunk/greenstone2
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/export.pl

    r22331 r22421  
    6666 
    6767use strict; 
    68 no strict 'refs'; # allow filehandles to be variables and vice versa 
    69 no strict 'subs'; # allow barewords (eg STDERR) as function arguments 
    70  
    71 use arcinfo; 
    72 use colcfg; 
    73 use dbutil; 
    74 use plugin; 
    75 use plugout; 
    76 use manifest; 
     68#no strict 'refs'; # allow filehandles to be variables and vice versa 
     69#no strict 'subs'; # allow barewords (eg STDERR) as function arguments 
    7770use inexport; 
    78 use util; 
    79 use scriptutil; 
    80 use FileHandle; 
    81 use gsprintf 'gsprintf'; 
    82 use printusage; 
    83 use parse2; 
    84  
    8571 
    8672my $oidtype_list =  
     
    128114    [  
    129115      $saveas_argument, 
    130       { 'name' => "exportdir", 
    131     'desc' => "{export.exportdir}", 
     116      { 'name' => "archivedir", 
     117    'desc' => "{export.archivedir}", 
    132118    'type' => "string", 
    133119    'reqd' => "no", 
     
    285271 
    286272 
     273 
     274sub main  
     275{ 
     276    my $inexport = new inexport("export",\@ARGV,$options,$listall_options); 
     277     
     278    my $collection = $inexport->get_collection(); 
     279    my ($config_filename,$collect_cfg) = $inexport->read_collection_cfg($collection,$options);     
     280    $inexport->set_collection_options($collect_cfg); 
     281     
     282    my $pluginfo = $inexport->process_files($config_filename,$collect_cfg); 
     283 
     284    $inexport->generate_statistics($pluginfo); 
     285} 
     286 
     287 
    287288&main(); 
    288289 
    289 sub main { 
    290     # params 
    291     my ($language, $verbosity, $debug, 
    292     $collectdir, $importdir, $exportdir, $site, $manifest,  
    293     $incremental, $incremental_mode, $keepold, $removeold, 
    294     $saveas, 
    295     $OIDtype, $OIDmetadata, 
    296     $maxdocs, $statsfile,  
    297     $gzip, 
    298     $out, $faillog, $gli, $listall,  
    299     # plugout specific ones 
    300     $mapping_file, $xsltfile,  
    301     $xslt_mets, $xslt_txt, $fedora_namespace, $group_marc); 
    302  
    303     my $xml = 0; 
    304      
    305     # other vars 
    306     my ($configfilename, $collection, $collectcfg, 
    307     $expinfo_doc_filename, $expinfo_src_filename, $export_info,  
    308     $gs_mode,  
    309     $processor, $pluginfo); 
    310  
    311     my $service = "export"; 
    312  
    313     my $hashParsingResult = {}; 
    314     # general options available to all plugins 
    315     my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options"); 
    316      
    317     # If parse returns -1 then something has gone wrong 
    318     if ($intArgLeftinAfterParsing == -1) 
    319     { 
    320     &PrintUsage::print_txt_usage($options, "{export.params}"); 
    321     die "\n"; 
    322     } 
    323  
    324     foreach my $strVariable (keys %$hashParsingResult) 
    325     { 
    326     eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}"; 
    327     } 
    328  
    329     
    330     # If $language has been specified, load the appropriate resource bundle 
    331     # (Otherwise, the default resource bundle will be loaded automatically) 
    332     if ($language && $language =~ /\S/) { 
    333     &gsprintf::load_language_specific_resource_bundle($language); 
    334     } 
    335  
    336     if ($listall) { 
    337     if ($xml) { 
    338         &PrintUsage::print_xml_usage($listall_options); 
    339     } 
    340     else 
    341     { 
    342         &PrintUsage::print_txt_usage($listall_options,"{export.params}"); 
    343     } 
    344     die "\n"; 
    345     } 
    346      
    347     if ($xml) { 
    348         &PrintUsage::print_xml_usage($options); 
    349     die "\n"; 
    350     } 
    351  
    352     if ($gli) { # the gli wants strings to be in UTF-8 
    353     &gsprintf::output_strings_in_UTF8;  
    354     } 
    355  
    356     # now check that we had exactly one leftover arg, which should be  
    357     # the collection name. We don't want to do this earlier, cos  
    358     # -xml arg doesn't need a collection name 
    359     # Or if the user specified -h, then we output the usage also 
    360     if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/)) 
    361     { 
    362     &PrintUsage::print_txt_usage($options, "{export.params}"); 
    363     die "\n"; 
    364     } 
    365  
    366     my $close_out = 0; 
    367     if ($out !~ /^(STDERR|STDOUT)$/i) { 
    368     open (OUT, ">$out") || 
    369         (&gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die); 
    370     $out = 'export::OUT'; 
    371     $close_out = 1; 
    372     } 
    373     $out->autoflush(1); 
    374  
    375     # get and check the collection name 
    376     if (($collection = &colcfg::use_collection($site, @ARGV, $collectdir)) eq "") { 
    377     &PrintUsage::print_txt_usage($options, "{export.params}"); 
    378     die "\n"; 
    379     } 
    380     # add collection's perllib dir  into include path in  
    381     # case we have collection specific modules 
    382     unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib"); 
    383      
    384     # check that we can open the faillog 
    385     if ($faillog eq "") { 
    386     $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log"); 
    387     } 
    388     open (FAILLOG, ">$faillog") || 
    389     (&gsprintf(STDERR, "{export.cannot_open_fail_log}\n", $faillog) && die); 
    390     my $faillogname = $faillog; 
    391     $faillog = 'export::FAILLOG'; 
    392     $faillog->autoflush(1); 
    393          
    394     # Read in the collection configuration file. 
    395     ($configfilename, $gs_mode) = &colcfg::get_collect_cfg_name($out); 
    396     $collectcfg = &colcfg::read_collection_cfg ($configfilename, $gs_mode); 
    397      
    398     # If the infodbtype value wasn't defined in the collect.cfg file, use the default 
    399     if (!defined($collectcfg->{'infodbtype'})) 
    400     { 
    401       $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type(); 
    402     } 
    403      
    404     if (defined $collectcfg->{'importdir'} && $importdir eq "") { 
    405     $importdir = $collectcfg->{'importdir'}; 
    406     } 
    407     if (defined $collectcfg->{'exportdir'} && $exportdir eq "") { 
    408     $exportdir = $collectcfg->{'exportdir'}; 
    409     } 
    410  
    411     # fill in the default import and export directories if none 
    412     # were supplied, turn all \ into / and remove trailing / 
    413     $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq ""; 
    414     $importdir =~ s/[\\\/]+/\//g; 
    415     $importdir =~ s/\/$//; 
    416     if (!-e $importdir) { 
    417     &gsprintf($out, "{import.no_import_dir}\n\n", $importdir); 
    418     die "\n"; 
    419     } 
    420  
    421     $exportdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "export") if $exportdir eq ""; 
    422     $exportdir =~ s/[\\\/]+/\//g; 
    423     $exportdir =~ s/\/$//; 
    424      
    425     my $plugins = []; 
    426     if (defined $collectcfg->{'plugin'}) { 
    427     $plugins = $collectcfg->{'plugin'}; 
    428     } 
    429     # some global options for the plugins            
    430     my @global_opts = (); 
    431  
    432     if ($verbosity !~ /\d+/) { 
    433     if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) { 
    434         $verbosity = $collectcfg->{'verbosity'}; 
    435     } else { 
    436         $verbosity = 2; # the default 
    437     } 
    438     } 
    439      
    440     if (defined $collectcfg->{'manifest'} && $manifest eq "") { 
    441     $manifest = $collectcfg->{'manifest'}; 
    442     } 
    443     if (defined $collectcfg->{'gzip'} && !$gzip) { 
    444     if ($collectcfg->{'gzip'} =~ /^true$/i) { 
    445         $gzip = 1; 
    446     } 
    447     } 
    448     if ($maxdocs !~ /\-?\d+/) { 
    449     if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) { 
    450         $maxdocs = $collectcfg->{'maxdocs'}; 
    451     } else { 
    452         $maxdocs = -1; # the default 
    453     } 
    454     } 
    455      
    456     # groupsize is in import - does it make sense here?? 
    457  
    458     if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental|assigned|dirname)$/)) { 
    459     if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental|assigned|dirname)$/) { 
    460         $OIDtype = $collectcfg->{'OIDtype'}; 
    461     } else { 
    462         $OIDtype = "hash"; # the default 
    463     } 
    464     } 
    465  
    466     if ((!defined $OIDmetadata) || ($OIDmetadata eq "")) { 
    467     if (defined $collectcfg->{'OIDmetadata'}) { 
    468         $OIDmetadata = $collectcfg->{'OIDmetadata'}; 
    469     } else { 
    470         $OIDmetadata = "dc.Identifier"; # the default 
    471     } 
    472     } 
    473  
    474     if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) { 
    475     $debug = 1; 
    476     } 
    477     if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) { 
    478     $gli = 1; 
    479     } 
    480     $gli = 0 unless defined $gli; 
    481  
    482     # check keepold and removeold 
    483     ($removeold, $keepold, $incremental, $incremental_mode)  
    484     = &scriptutil::check_removeold_and_keepold($removeold, $keepold,  
    485                            $incremental, "export",  
    486                            $collectcfg); 
    487  
    488     print STDERR "<export>\n" if $gli; 
    489      
    490     my $manifest_lookup = new manifest($collectcfg->{'infodbtype'},$exportdir); 
    491     if ($manifest ne "") {   
    492     my $manifest_filename = $manifest; 
    493  
    494     if ($manifest_filename !~ m/^[\\\/]/) { 
    495         $manifest_filename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, $manifest_filename); 
    496     } 
    497  
    498     $manifest =~ s/[\\\/]+/\//g; 
    499     $manifest =~ s/\/$//; 
    500  
    501     $manifest_lookup->parse($manifest_filename); 
    502     } 
    503      
    504     # load all the plugins 
    505     $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts, $incremental_mode); 
    506          
    507     if (scalar(@$pluginfo) == 0) { 
    508     &gsprintf($out, "{import.no_plugins_loaded}\n"); 
    509     die "\n"; 
    510     } 
    511      
    512     # remove the old contents of the export directory if needed 
    513     if ($removeold) { 
    514     if (-e $exportdir) { 
    515         &gsprintf($out, "{export.removing_export}\n"); 
    516         &util::rm_r ($exportdir); 
    517     } 
    518     my $tmpdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "tmp"); 
    519     $tmpdir =~ s/[\\\/]+/\//g; 
    520     $tmpdir =~ s/\/$//; 
    521     if (-e $tmpdir) { 
    522         &gsprintf($out, "{import.removing_tmpdir}\n"); 
    523         &util::rm_r ($tmpdir); 
    524     } 
    525     } 
    526      
    527     # create the export dir if needed 
    528     &util::mk_all_dir($exportdir); 
    529      
    530     # read the export information file 
    531      
    532     # the plugouts should be doing this!! 
    533 ##  $expinfo_doc_filename = &util::filename_cat ($exportdir, "export.inf"); 
    534  
    535     # BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files (won't do anything for other infodbtypes) 
    536     &util::rename_ldb_or_bdb_file(&util::filename_cat($exportdir, "archiveinf-doc")); 
    537     &util::rename_ldb_or_bdb_file(&util::filename_cat($exportdir, "archiveinf-src")); 
    538  
    539     $expinfo_doc_filename = &dbutil::get_infodb_file_path($collectcfg->{'infodbtype'}, "archiveinf-doc", $exportdir); 
    540     $expinfo_src_filename = &dbutil::get_infodb_file_path($collectcfg->{'infodbtype'}, "archiveinf-src", $exportdir); 
    541          
    542     $export_info = new arcinfo($collectcfg->{'infodbtype'}); 
    543     $export_info -> load_info ($expinfo_doc_filename);   
    544          
    545     if ($manifest eq "") { 
    546     # Load in list of files in export folder from last export (if present) 
    547     $export_info->load_prev_import_filelist ($expinfo_src_filename); 
    548     } 
    549      
    550     my ($plugout);  
    551     if (defined $collectcfg->{'plugout'} && $collectcfg->{'plugout'} =~ /^(.*METS|DSpace|MARCXML)Plugout/) { 
    552     $plugout = $collectcfg->{'plugout'}; 
    553     } 
    554     else{ 
    555     if ($saveas !~ /^(GreenstoneMETS|FedoraMETS|DSpace|MARCXML)$/) { 
    556         push @$plugout,"GreenstoneMETSPlugout"; 
    557     } 
    558     else{ 
    559         push @$plugout,$saveas."Plugout"; 
    560     } 
    561     } 
    562      
    563     my $plugout_name = $plugout->[0]; 
    564          
    565     push @$plugout,("-output_info",$export_info) if (defined $export_info);  
    566     push @$plugout,("-verbosity",$verbosity) if (defined $verbosity); 
    567     push @$plugout,("-debug") if ($debug); 
    568     push @$plugout,("-gzip_output") if ($gzip); 
    569     push @$plugout,("-output_handle",$out) if (defined $out); 
    570     push @$plugout,("-xslt_file",$xsltfile) if (defined $xsltfile && $xsltfile ne ""); 
    571     push @$plugout,("-group") if ($group_marc && $plugout_name =~ m/^MARCXMLPlugout$/); 
    572     push @$plugout,("-mapping_file",$mapping_file) if (defined $mapping_file && $mapping_file ne "" && $plugout_name =~ m/^MARCXMLPlugout$/); 
    573     push @$plugout,("-xslt_mets",$xslt_mets) if (defined $xslt_mets && $xslt_mets ne "" && $plugout_name =~ m/^.*METSPlugout$/); 
    574     push @$plugout,("-xslt_txt",$xslt_txt) if (defined $xslt_txt && $xslt_txt ne "" && $plugout_name =~ m/^.*METSPlugout$/); 
    575     push @$plugout,("-fedora_namespace",$fedora_namespace) if (defined $fedora_namespace && $fedora_namespace ne "" && $plugout_name eq "FedoraMETSPlugout"); 
    576      
    577     $processor = &plugout::load_plugout($plugout);     
    578     $processor->setoutputdir ($exportdir); 
    579          
    580     $processor->set_OIDtype ($OIDtype, $OIDmetadata); 
    581          
    582     &plugin::begin($pluginfo, $importdir, $processor, $maxdocs, $gli); 
    583          
    584     if ($manifest eq "") { 
    585     # process the import directory 
    586     my $block_hash = {}; 
    587     my $metadata = {}; 
    588     # gobal blocking pass may set up some metadata 
    589     &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli); 
    590     #&plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 
    591     ### section below copied from import.pl 
    592     if ($incremental) { 
    593         # equivalent to saying ($keepold && ($incremental_mode eq "all")) 
    594  
    595         &inexport::prime_doc_oid_count($exportdir); 
    596  
    597  
    598         # Can now work out which files were new, already existed, and have 
    599         # been deleted 
    600          
    601         &inexport::new_vs_old_import_diff($export_info,$block_hash,$importdir, 
    602                           $exportdir,$verbosity,$incremental_mode); 
    603          
    604         my @deleted_files = sort keys %{$block_hash->{'deleted_files'}}; 
    605         # Filter out any in gsdl/tmp area 
    606         my @filtered_deleted_files = (); 
    607         my $gsdl_tmp_area = &util::filename_cat($ENV{'GSDLHOME'}, "tmp"); 
    608         my $collect_tmp_area = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp"); 
    609         $gsdl_tmp_area = &util::filename_to_regex($gsdl_tmp_area); 
    610         $collect_tmp_area = &util::filename_to_regex($collect_tmp_area); 
    611  
    612  
    613         foreach my $df (@deleted_files) { 
    614         next if ($df =~ m/^$gsdl_tmp_area/); 
    615         next if ($df =~ m/^$collect_tmp_area/); 
    616          
    617         push(@filtered_deleted_files,$df); 
    618         } 
    619  
    620          
    621         @deleted_files = @filtered_deleted_files; 
    622  
    623         if (scalar(@deleted_files>0)) { 
    624         print STDERR "Files deleted since last import:\n  "; 
    625         print STDERR join("\n  ",@deleted_files), "\n"; 
    626         } 
    627          
    628         my @new_files = sort keys %{$block_hash->{'new_files'}}; 
    629         if (scalar(@new_files>0)) { 
    630         print STDERR "New files since last import:\n  "; 
    631         print STDERR join("\n  ",@new_files), "\n"; 
    632         } 
    633          
    634         &inexport::mark_docs_for_deletion($export_info,$block_hash,\@deleted_files, 
    635                           $exportdir,$verbosity); 
    636  
    637         &inexport::mark_docs_for_reindex($export_info,$block_hash, 
    638                          $exportdir,$verbosity); 
    639  
    640         my @reindex_files = sort keys %{$block_hash->{'reindex_files'}}; 
    641  
    642         if (scalar(@reindex_files>0)) { 
    643         print STDERR "Files to reindex since last import:\n  "; 
    644         print STDERR join("\n  ",@reindex_files), "\n"; 
    645         } 
    646  
    647  
    648         # not sure if the following will work -- will the metadata data-structure be correctly initialized 
    649         # in the right order? 
    650 #       foreach my $file (@new_files, @reindex_files) { 
    651 #       &plugin::read ($pluginfo, $importdir, $file, $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 
    652 #       } 
    653  
    654  
    655         # Play it safe, and run through the entire folder, only processing new or edited files 
    656         &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 
    657  
    658     } 
    659     else { 
    660         &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 
    661     } 
    662  
    663     ### end copy 
    664     }  
    665     else { 
    666     # process any files marked for exporting 
    667     foreach my $file (keys %{$manifest_lookup->{'index'}}) { 
    668         &plugin::read ($pluginfo, $importdir, $file, {}, {}, $processor, $maxdocs, 0, $gli); 
    669     } 
    670  
    671     my @deleted_files = keys %{$manifest_lookup->{'delete'}}; 
    672  
    673     &inexport::mark_docs_for_deletion($export_info,{},\@deleted_files,$exportdir); 
    674  
    675     } 
    676  
    677     if ($saveas eq "FedoraMETS") { 
    678     # create collection "doc obj" for Fedora that contains 
    679     # collection-level metadata 
    680      
    681     my $doc_obj = new doc($configfilename,"nonindexed_doc","none"); 
    682     $doc_obj->set_OID("collection"); 
    683      
    684     my $col_name = undef; 
    685     my $col_meta = $collectcfg->{'collectionmeta'}; 
    686      
    687     if (defined $col_meta) { 
    688          
    689         store_collectionmeta($col_meta,"collectionname",$doc_obj); # in GS3 this is a collection's name 
    690         store_collectionmeta($col_meta,"collectionextra",$doc_obj); # in GS3 this is a collection's description 
    691          
    692     } 
    693     $processor->process($doc_obj); 
    694     } 
    695          
    696     &plugin::end($pluginfo, $processor); 
    697  
    698     &plugin::deinit($pluginfo, $processor); 
    699          
    700     # Store the value of OIDCount (used in doc.pm) so it can be 
    701     # restored correctly to this value on an incremental build 
    702     &inexport::store_doc_oid_count($exportdir); 
    703  
    704     # write out the export information file 
    705     #$processor->close_file_output() if $groupsize > 1; 
    706     $processor->close_group_output() if $processor->is_group(); 
    707  
    708 #    if (($saveas =~ m/^.*METS$/) || ($saveas eq "MARCXML")) { 
    709 #   # Not all export types need this, 
    710  
    711 ##  $export_info->save_info($expinfo_doc_filename); 
    712 #    } 
    713  
    714  
    715     # for backwards compatability with archvies.inf file 
    716     if ($expinfo_doc_filename =~ m/(contents)|(\.inf)$/) { 
    717     $export_info->save_info($expinfo_doc_filename); 
    718     } 
    719     else { 
    720     $export_info->save_revinfo_db($expinfo_src_filename); 
    721     } 
    722  
    723          
    724     # write out export stats 
    725     my $close_stats = 0; 
    726     if ($statsfile !~ /^(STDERR|STDOUT)$/i) { 
    727     if (open (STATS, ">$statsfile")) { 
    728         $statsfile = 'import::STATS'; 
    729         $close_stats = 1; 
    730     } else { 
    731         &gsprintf($out, "{import.cannot_open_stats_file}", $statsfile); 
    732         &gsprintf($out, "{import.stats_backup}\n"); 
    733         $statsfile = 'STDERR'; 
    734     } 
    735     } 
    736  
    737     &gsprintf($out, "\n"); 
    738     &gsprintf($out, "*********************************************\n"); 
    739     &gsprintf($out, "{export.complete}\n"); 
    740     &gsprintf($out, "*********************************************\n"); 
    741     
    742     &plugin::write_stats($pluginfo, $statsfile, $faillogname, $gli); 
    743     if ($close_stats) { 
    744     close STATS; 
    745     } 
    746      
    747     close OUT if $close_out; 
    748     close FAILLOG; 
    749 } 
    750  
    751  
    752 sub store_collectionmeta 
    753 { 
    754     my ($collectionmeta,$field,$doc_obj) = @_; 
    755  
    756     my $section = $doc_obj->get_top_section(); 
    757  
    758     my $field_hash = $collectionmeta->{$field}; 
    759  
    760     foreach my $k (keys %$field_hash)  
    761     { 
    762     my $val = $field_hash->{$k}; 
    763  
    764     ### print STDERR "*** $k = $field_hash->{$k}\n"; 
    765  
    766     my $md_label = "ex.$field"; 
    767  
    768  
    769     if ($k =~ m/^\[l=(.*?)\]$/) 
    770     { 
    771  
    772         my $md_suffix = $1; 
    773         $md_label .= "^$md_suffix"; 
    774     } 
    775  
    776  
    777     $doc_obj->add_utf8_metadata($section,$md_label, $val); 
    778      
    779     # see collConfigxml.pm: GS2's "collectionextra" is called "description" in GS3, 
    780     # while "collectionname" in GS2 is called "name" in GS3. 
    781     # Variable $nameMap variable in collConfigxml.pm maps between GS2 and GS3 
    782     if (($md_label eq "ex.collectionname^en") || ($md_label eq "ex.collectionname")) 
    783     { 
    784         $doc_obj->add_utf8_metadata($section,"dc.Title", $val); 
    785     } 
    786  
    787     } 
    788 } 
    789  
    790  
    791  
    792  
     290 
  • main/trunk/greenstone2/bin/script/import.pl

    r22413 r22421  
    241241    'range' => "0,", 
    242242    # parsearg left "" as default 
    243     'deft' => "2", 
     243    # 'deft' => "2", 
    244244    'reqd' => "no", 
    245245    'modegli' => "3" }, 
     
    263263sub main  
    264264{ 
    265     my $inexport = new inexport(\@ARGV,$options); 
     265    my $inexport = new inexport("import",\@ARGV,$options); 
    266266     
    267267    my $collection = $inexport->get_collection(); 
    268     my $collect_cfg = $inexport->read_collection_cfg($collection,$options);     
    269     $inexport->set_collection_options("import",$collect_cfg); 
     268    my ($config_filename,$collect_cfg) = $inexport->read_collection_cfg($collection,$options);     
     269    $inexport->set_collection_options($collect_cfg); 
    270270     
    271     my $pluginfo = $inexport->process_files("import",$collect_cfg); 
    272  
    273     $inexport->generate_statistics("import",$pluginfo); 
     271    my $pluginfo = $inexport->process_files($config_filename,$collect_cfg); 
     272 
     273    $inexport->generate_statistics($pluginfo); 
    274274} 
    275275 
  • main/trunk/greenstone2/perllib/inexport.pm

    r22413 r22421  
    5151{ 
    5252    my $class = shift (@_); 
    53     my ($argv,$options) = @_; 
    54  
    55     my $self = { 'xml' => 0 }; 
     53    my ($mode,$argv,$options,$opt_listall_options) = @_; 
     54 
     55    my $self = { 'xml' => 0, 'mode' => $mode }; 
    5656 
    5757    # general options available to all plugins 
     
    7272    } 
    7373 
     74    if ($self->{'listall'}) { 
     75    if ($self->{'xml'}) { 
     76        &PrintUsage::print_xml_usage($opt_listall_options); 
     77    } 
     78    else 
     79    { 
     80        &PrintUsage::print_txt_usage($opt_listall_options,"{export.params}"); 
     81    } 
     82    die "\n"; 
     83    } 
     84 
     85 
    7486    if ($self->{'xml'}) { 
    7587        &PrintUsage::print_xml_usage($options); 
     
    151163 
    152164    # Read in the collection configuration file. 
    153     my ($configfilename, $gs_mode) = &colcfg::get_collect_cfg_name($out); 
    154     my $collectcfg = &colcfg::read_collection_cfg ($configfilename, $gs_mode); 
    155  
    156     return $collectcfg; 
     165    my ($config_filename, $gs_mode) = &colcfg::get_collect_cfg_name($out); 
     166    my $collectcfg = &colcfg::read_collection_cfg ($config_filename, $gs_mode); 
     167 
     168    return ($config_filename,$collectcfg); 
    157169} 
    158170 
     
    160172{ 
    161173    my $self = shift @_; 
    162     my ($inexport_mode,$collectcfg) = @_; 
     174    my ($collectcfg) = @_; 
     175 
     176    my $inexport_mode = $self->{'mode'}; 
    163177 
    164178    my $verbosity  = $self->{'verbosity'}; 
    165     print STDERR "**** verbosity = $verbosity\n\n\n"; 
    166  
    167179    my $debug      = $self->{'debug'}; 
    168180    my $importdir  = $self->{'importdir'}; 
     
    218230    } 
    219231    } 
     232    $self->{'verbosity'} = $verbosity; 
     233 
    220234    if (defined $collectcfg->{'manifest'} && $self->{'manifest'} eq "") { 
    221235    $self->{'manifest'} = $collectcfg->{'manifest'}; 
     
    235249    } 
    236250    } 
    237     if ($self->{'groupsize'} == 1) { 
     251 
     252    if ((defined $self->{'groupsize'}) && ($self->{'groupsize'} == 1)) { 
    238253    if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) { 
    239254        $self->{'groupsize'} = $collectcfg->{'groupsize'}; 
     
    287302        
    288303    # check keepold and removeold 
     304    my $checkdir = ($inexport_mode eq "import") ? "archives" : "export"; 
     305 
    289306    my ($removeold, $keepold, $incremental, $incremental_mode)  
    290307    = &scriptutil::check_removeold_and_keepold($self->{'removeold'}, $self->{'keepold'},  
    291                            $self->{'incremental'}, "archives",  
     308                           $self->{'incremental'}, $checkdir,  
    292309                           $collectcfg); 
    293310 
     
    301318{ 
    302319    my $self = shift @_; 
    303     my ($inexport_mode,$collectcfg) = @_; 
     320    my ($config_filename,$collectcfg) = @_; 
     321 
     322    my $inexport_mode = $self->{'mode'}; 
    304323 
    305324    my $verbosity   = $self->{'verbosity'}; 
     
    330349    my $removesuffix = $self->{'removesuffix'}; 
    331350 
    332     my $gli         = $self->{'gli'}; 
    333  
    334     print STDERR "<Import>\n" if $gli; 
     351    my $gli          = $self->{'gli'}; 
     352 
     353    # related to export 
     354    my $xsltfile         = $self->{'xsltfile'}; 
     355    my $group_marc       = $self->{'group_marc'}; 
     356    my $mapping_file     = $self->{'mapping_file'}; 
     357    my $xslt_mets        = $self->{'xslt_mets'}; 
     358    my $xslt_txt         = $self->{'xslt_txt'}; 
     359    my $fedora_namespace = $self->{'fedora_namespace'}; 
     360 
     361    if ($inexport_mode eq "import") { 
     362    print STDERR "<Import>\n" if $gli; 
     363    } 
     364    else { 
     365    print STDERR "<export>\n" if $gli; 
     366    } 
    335367     
    336368    my $manifest_lookup = new manifest($collectcfg->{'infodbtype'},$archivedir); 
     
    358390    #some global options for the plugins 
    359391    my @global_opts = (); 
    360  
    361392 
    362393    my $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts, $incremental_mode); 
     
    403434 
    404435    ####Use Plugout#### 
    405     my ($plugout);  
    406     if (defined $collectcfg->{'plugout'}) { 
    407     # If a plugout was specified in the collect.cfg file, assume it is sensible 
    408     # We can't check the name because it could be anything, if it is a custom plugout 
    409     $plugout = $collectcfg->{'plugout'}; 
    410     } 
    411     else{ 
    412     if ($saveas !~ /^(GreenstoneXML|GreenstoneMETS)$/) { 
    413         push @$plugout,"GreenstoneXMLPlugout"; 
     436    my $plugout;  
     437 
     438    if ($inexport_mode eq "import") { 
     439    if (defined $collectcfg->{'plugout'}) { 
     440        # If a plugout was specified in the collect.cfg file, assume it is sensible 
     441        # We can't check the name because it could be anything, if it is a custom plugout 
     442        $plugout = $collectcfg->{'plugout'}; 
    414443    } 
    415444    else{ 
    416         push @$plugout,$saveas."Plugout"; 
    417     } 
    418     } 
    419  
    420     push @$plugout,("-output_info",$archive_info) if (defined $archive_info);  
    421     push @$plugout,("-verbosity",$verbosity)      if (defined $verbosity); 
    422     push @$plugout,("-gzip_output")               if ($gzip); 
    423     push @$plugout,("-group_size",$groupsize)     if (defined $groupsize); 
    424     push @$plugout,("-output_handle",$out)        if (defined); 
    425     push @$plugout,("-debug")                     if ($debug); 
    426      
     445        if ($saveas !~ /^(GreenstoneXML|GreenstoneMETS)$/) { 
     446        push @$plugout,"GreenstoneXMLPlugout"; 
     447        } 
     448        else{ 
     449        push @$plugout,$saveas."Plugout"; 
     450        } 
     451    } 
     452    } 
     453    else { 
     454    if (defined $collectcfg->{'plugout'} && $collectcfg->{'plugout'} =~ /^(.*METS|DSpace|MARCXML)Plugout/) { 
     455        $plugout = $collectcfg->{'plugout'}; 
     456    } 
     457    else{ 
     458        if ($saveas !~ /^(GreenstoneMETS|FedoraMETS|DSpace|MARCXML)$/) { 
     459        push @$plugout,"GreenstoneMETSPlugout"; 
     460        } 
     461        else{ 
     462        push @$plugout,$saveas."Plugout"; 
     463        } 
     464    } 
     465    } 
     466     
     467    my $plugout_name = $plugout->[0]; 
     468 
     469    push @$plugout,("-output_info",$archive_info)  if (defined $archive_info);  
     470    push @$plugout,("-verbosity",$verbosity)       if (defined $verbosity); 
     471    push @$plugout,("-debug")                      if ($debug); 
     472    push @$plugout,("-group_size",$groupsize)      if (defined $groupsize); 
     473    push @$plugout,("-gzip_output")                if ($gzip); 
     474    push @$plugout,("-output_handle",$out)         if (defined $out); 
     475 
     476    push @$plugout,("-xslt_file",$xsltfile)        if (defined $xsltfile && $xsltfile ne ""); 
     477 
     478    if ($plugout_name =~ m/^MARCXMLPlugout$/) { 
     479    push @$plugout,("-group")                      if ($group_marc); 
     480    push @$plugout,("-mapping_file",$mapping_file) if (defined $mapping_file && $mapping_file ne ""); 
     481    } 
     482    if ($plugout_name =~ m/^.*METSPlugout$/) { 
     483    push @$plugout,("-xslt_mets",$xslt_mets)       if (defined $xslt_mets && $xslt_mets ne ""); 
     484    push @$plugout,("-xslt_txt",$xslt_txt)         if (defined $xslt_txt && $xslt_txt ne ""); 
     485    } 
     486 
     487    if ($plugout_name eq "FedoraMETSPlugout") { 
     488    push @$plugout,("-fedora_namespace",$fedora_namespace) if (defined $fedora_namespace && $fedora_namespace ne ""); 
     489    } 
     490 
     491 
    427492    my $processor = &plugout::load_plugout($plugout);                         
    428493    $processor->setoutputdir ($archivedir); 
     
    443508    &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli); 
    444509 
    445  
    446510    if ($incremental || $incremental_mode eq "onlyadd") { 
    447511 
    448512        prime_doc_oid_count($archivedir); 
    449  
    450513 
    451514        # Can now work out which files were new, already existed, and have 
     
    532595    &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@full_deleted_files); 
    533596    mark_docs_for_deletion($archive_info,{}, 
    534                       \@full_deleted_files, 
    535                       $archivedir, $verbosity, "delete"); 
     597                   \@full_deleted_files, 
     598                   $archivedir, $verbosity, "delete"); 
    536599 
    537600 
     
    574637    } 
    575638 
     639    if ($saveas eq "FedoraMETS") { 
     640    # create collection "doc obj" for Fedora that contains 
     641    # collection-level metadata 
     642     
     643    my $doc_obj = new doc($config_filename,"nonindexed_doc","none"); 
     644    $doc_obj->set_OID("collection"); 
     645     
     646    my $col_name = undef; 
     647    my $col_meta = $collectcfg->{'collectionmeta'}; 
     648     
     649    if (defined $col_meta) {         
     650        store_collectionmeta($col_meta,"collectionname",$doc_obj); # in GS3 this is a collection's name 
     651        store_collectionmeta($col_meta,"collectionextra",$doc_obj); # in GS3 this is a collection's description      
     652    } 
     653    $processor->process($doc_obj); 
     654    } 
     655 
    576656    &plugin::end($pluginfo, $processor); 
    577657 
     
    586666    $processor->close_group_output() if $processor->is_group(); 
    587667 
    588 # The following 'if' statement is in the export.pl version of the script, 
    589 # The reason for the 'if' statement is now given in export.pl 
    590 # Unclear at this point if the same should be done here 
    591 ##    if (($saveas =~ m/^.*METS$/) || ($saveas eq "MARC")) { 
    592     # Not all export types need this (e.g. DSpace) 
    593  
    594     # should we still do this in debug mode?? 
    595  
    596668    # for backwards compatability with archvies.inf file 
    597669    if ($arcinfo_doc_filename =~ m/(contents)|(\.inf)$/) { 
     
    602674    } 
    603675 
    604  
    605 ##    } 
    606  
    607676    return $pluginfo; 
    608677} 
     
    612681{ 
    613682    my $self = shift @_; 
    614     my ($inexport_mode,$pluginfo) = @_; 
     683    my ($pluginfo) = @_; 
     684 
     685    my $inexport_mode = $self->{'mode'}; 
    615686 
    616687    my $statsfile = $self->{'statsfile'}; 
     
    634705    &gsprintf($out, "\n"); 
    635706    &gsprintf($out, "*********************************************\n"); 
    636     &gsprintf($out, "{import.complete}\n"); 
     707    &gsprintf($out, "{$inexport_mode.complete}\n"); 
    637708    &gsprintf($out, "*********************************************\n"); 
    638709