Changeset 17142


Ignore:
Timestamp:
2008-09-04T14:27:27+12:00 (16 years ago)
Author:
kjdon
Message:

tidied up import and export scripts a little. Reordered code to make them more similar in preparation for merging the scripts later. import only supports GA and GreenstoneMETS, export only supports Greenstone/FedoraMETS, DSpace, MARCXML. export.pl now only exports one collection at a time. And I have removed some options that didn't make sense - sortmeta, groupsize.

Location:
gsdl/trunk/bin/script
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/bin/script/export.pl

    r16425 r17142  
    88# University of Waikato, New Zealand.
    99#
    10 # Copyright (C) 1999 New Zealand Digital Library Project
     10# Copyright (C) 2004 New Zealand Digital Library Project
    1111#
    1212# This program is free software; you can redistribute it and/or modify
     
    2828
    2929# This program will export a particular collection into a specific Format (e.g. METS or DSpace)
    30 # Author: Chi-Yu Huang Date: 08-10-2004
    3130
    3231package export;
     
    3938    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
    4039    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
    41     unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
    4240
    4341    if (defined $ENV{'GSDLEXTS'}) {
     
    4846        unshift (@INC, "$ext_prefix/perllib");
    4947        unshift (@INC, "$ext_prefix/perllib/cpan");
     48        unshift (@INC, "$ext_prefix/perllib/plugins");
     49        unshift (@INC, "$ext_prefix/perllib/plugouts");
    5050    }
    5151    }
     
    7979        'desc' => "{import.OIDtype.dirname}" } ];
    8080
    81 #** define to use the METS format or DSpace format
     81# what format to export as
    8282my $saveas_list =
    83     [ { 'name' => "DSpace",
    84         'desc' => "{export.saveas.DSpace}" },
    85       { 'name' => "GreenstoneMETS",
     83    [ { 'name' => "GreenstoneMETS",
    8684        'desc' => "{export.saveas.GreenstoneMETS}"},
    8785      { 'name' => "FedoraMETS",
    8886        'desc' => "{export.saveas.FedoraMETS}"},
    89       { 'name' => "GA",
    90         'desc' => "{export.saveas.GA}"},
    9187      { 'name' => "MARCXML",
    92         'desc' => "{export.saveas.MARCXML}"}
     88        'desc' => "{export.saveas.MARCXML}"},
     89      { 'name' => "DSpace",
     90        'desc' => "{export.saveas.DSpace}" }
    9391     ];
    9492
     
    107105    'type' => "enum",
    108106    'list' => $saveas_list,
    109     'deft' => "METS",
     107    'deft' => "GreenstoneMETS",
    110108    'reqd' => "no",
    111109    'modegli' => "3" };
     
    125123    'reqd' => "no",
    126124        'hiddengli' => "yes" },
    127       { 'name' => "site",
    128     'desc' => "{import.site}",
    129     'type' => "string",
    130     'deft' => "",
    131     'reqd' => "no",
    132         'hiddengli' => "yes" },
    133125      { 'name' => "collectdir",
    134126    'desc' => "{export.collectdir}",
     
    139131    'reqd' => "no",
    140132    'hiddengli' => "yes" },
    141       { 'name' => "manifest",
     133      { 'name' => "site",
     134    'desc' => "{import.site}",
     135    'type' => "string",
     136    'deft' => "",
     137    'reqd' => "no",
     138        'hiddengli' => "yes" },
     139     { 'name' => "manifest",
    142140    'desc' => "{import.manifest}",
    143141    'type' => "string",
     
    160158    'reqd' => "no",
    161159        'modegli' => "4" },
    162       { 'name' => "incremental",
    163     'desc' => "{import.incremental}",
    164     'type' => "flag",
    165     'hiddengli' => "yes" },
     160      # does this make sense?
     161#      { 'name' => "incremental",
     162#   'desc' => "{import.incremental}",
     163#   'type' => "flag",
     164#   'hiddengli' => "yes" },
    166165      { 'name' => "keepold",
    167166    'desc' => "{export.keepold}",
     
    250249    'modegli' => "4" },
    251250      { 'name' => "gli",
    252     'desc' => "",
     251    'desc' => "{scripts.gli}",
    253252    'type' => "flag",
    254253    'reqd' => "no",
     
    269268                'args' => [ $saveas_argument ] };
    270269
    271 #sub gsprintf
    272 #{
    273 #    return &gsprintf::gsprintf(@_);
    274 #}
     270
     271&main();
     272
     273sub main {
     274    # params
     275    my ($language, $verbosity, $debug,
     276    $collectdir, $importdir, $exportdir, $site, $manifest,
     277    $incremental, $keepold, $removeold,
     278    $saveas,
     279    $OIDtype, $OIDmetadata,
     280    $maxdocs, $statsfile,
     281    $out, $faillog, $gli, $listall,
     282    # plugout specific ones
     283    $mapping_file, $xsltfile,
     284    $xslt_mets, $xslt_txt, $fedora_namespace, $group_marc);
     285
     286    my $xml = 0;
     287   
     288    # other vars
     289    my ($configfilename, $collection, $collectcfg,
     290    $export_info_filename, $export_info,
     291    $gs_mode,
     292    $processor, $pluginfo);
     293
     294    my $service = "export";
     295
     296    my $hashParsingResult = {};
     297    # general options available to all plugins
     298    my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
     299   
     300    # If parse returns -1 then something has gone wrong
     301    if ($intArgLeftinAfterParsing == -1)
     302    {
     303    &PrintUsage::print_txt_usage($options, "{export.params}");
     304    die "\n";
     305    }
     306
     307    foreach my $strVariable (keys %$hashParsingResult)
     308    {
     309    eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
     310    }
     311
     312   
     313    # these are options used by other things - we just set default values
     314    # undef means will be set from config file if there
     315    my $gzip = undef;
     316
     317    # If $language has been specified, load the appropriate resource bundle
     318    # (Otherwise, the default resource bundle will be loaded automatically)
     319    if ($language && $language =~ /\S/) {
     320    &gsprintf::load_language_specific_resource_bundle($language);
     321    }
     322
     323    if ($listall) {
     324    if ($xml) {
     325        &PrintUsage::print_xml_usage($listall_options);
     326    }
     327    else
     328    {
     329        &PrintUsage::print_txt_usage($listall_options,"{export.params}");
     330    }
     331    die "\n";
     332    }
     333   
     334    if ($xml) {
     335        &PrintUsage::print_xml_usage($options);
     336    die "\n";
     337    }
     338
     339    if ($gli) { # the gli wants strings to be in UTF-8
     340    &gsprintf::output_strings_in_UTF8;
     341    }
     342
     343    # now check that we had exactly one leftover arg, which should be
     344    # the collection name. We don't want to do this earlier, cos
     345    # -xml arg doesn't need a collection name
     346    # Or if the user specified -h, then we output the usage also
     347    if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/))
     348    {
     349    &PrintUsage::print_txt_usage($options, "{export.params}");
     350    die "\n";
     351    }
     352
     353    my $close_out = 0;
     354    if ($out !~ /^(STDERR|STDOUT)$/i) {
     355    open (OUT, ">$out") ||
     356        (&gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
     357    $out = 'export::OUT';
     358    $close_out = 1;
     359    }
     360    $out->autoflush(1);
     361
     362    # get and check the collection name
     363    if (($collection = &colcfg::use_collection($site, @ARGV, $collectdir)) eq "") {
     364    &PrintUsage::print_txt_usage($options, "{export.params}");
     365    die "\n";
     366    }
     367    # add collection's perllib dir  into include path in
     368    # case we have collection specific modules
     369    unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
     370   
     371    # check that we can open the faillog
     372    if ($faillog eq "") {
     373    $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
     374    }
     375    open (FAILLOG, ">$faillog") ||
     376    (&gsprintf(STDERR, "{export.cannot_open_fail_log}\n", $faillog) && die);
     377    my $faillogname = $faillog;
     378    $faillog = 'export::FAILLOG';
     379    $faillog->autoflush(1);
     380       
     381    # Read in the collection configuration file.
     382    ($configfilename, $gs_mode) = &colcfg::get_collect_cfg_name($out);
     383       
     384   
     385    # Read in the collection configuration file.
     386    ($configfilename, $gs_mode) = &colcfg::get_collect_cfg_name($out);
     387   
     388    if ($gs_mode eq "gs2") {
     389    $collectcfg = &colcfg::read_collect_cfg ($configfilename);
     390    } elsif ($gs_mode eq "gs3") {
     391    $collectcfg = &colcfg::read_collection_cfg_xml ($configfilename);
     392    }
     393
     394    if (defined $collectcfg->{'importdir'} && $importdir eq "") {
     395    $importdir = $collectcfg->{'importdir'};
     396    }
     397    if (defined $collectcfg->{'exportdir'} && $exportdir eq "") {
     398    $exportdir = $collectcfg->{'exportdir'};
     399    }
     400
     401    # fill in the default import and export directories if none
     402    # were supplied, turn all \ into / and remove trailing /
     403    $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq "";
     404    $importdir =~ s/[\\\/]+/\//g;
     405    $importdir =~ s/\/$//;
     406    if (!-e $importdir) {
     407    &gsprintf($out, "{import.no_import_dir}\n\n", $importdir);
     408    die "\n";
     409    }
     410
     411    $exportdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "export") if $exportdir eq "";
     412    $exportdir =~ s/[\\\/]+/\//g;
     413    $exportdir =~ s/\/$//;
     414   
     415    my $plugins = [];
     416    if (defined $collectcfg->{'plugin'}) {
     417    $plugins = $collectcfg->{'plugin'};
     418    }
     419    # some global options for the plugins           
     420    my @global_opts = ();
     421
     422    if ($verbosity !~ /\d+/) {
     423    if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
     424        $verbosity = $collectcfg->{'verbosity'};
     425    } else {
     426        $verbosity = 2; # the default
     427    }
     428    }
     429   
     430    if (defined $collectcfg->{'gzip'} && !$gzip) {
     431    if ($collectcfg->{'gzip'} =~ /^true$/i) {
     432        $gzip = 1;
     433    }
     434    }
     435    if ($maxdocs !~ /\-?\d+/) {
     436    if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
     437        $maxdocs = $collectcfg->{'maxdocs'};
     438    } else {
     439        $maxdocs = -1; # the default
     440    }
     441    }
     442   
     443    if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental|assigned|dirname)$/)) {
     444    if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental|assigned|dirname)$/) {
     445        $OIDtype = $collectcfg->{'OIDtype'};
     446    } else {
     447        $OIDtype = "hash"; # the default
     448    }
     449    }
     450
     451    if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
     452    $debug = 1;
     453    }
     454    if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) {
     455    $gli = 1;
     456    }
     457    $gli = 0 unless defined $gli;
     458
     459    # check keepold and removeold
     460    ($removeold, $keepold, $incremental) = &scriptutil::check_removeold_and_keepold($removeold, $keepold, $incremental, "export", $collectcfg);
     461
     462    print STDERR "<export>\n" if $gli;
     463   
     464    my $manifest_lookup = new manifest();
     465    if ($manifest ne "") { 
     466    my $manifest_filename = $manifest;
     467
     468    if ($manifest_filename !~ m/^[\\\/]/) {
     469        $manifest_filename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, $manifest_filename);
     470    }
     471
     472    $manifest =~ s/[\\\/]+/\//g;
     473    $manifest =~ s/\/$//;
     474
     475    $manifest_lookup->parse($manifest_filename);
     476    }
     477   
     478    # load all the plugins
     479    $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts);
     480       
     481    if (scalar(@$pluginfo) == 0) {
     482    &gsprintf($out, "{import.no_plugins_loaded}\n");
     483    die "\n";
     484    }
     485   
     486    # remove the old contents of the export directory if needed
     487    if ($removeold && -e $exportdir) {
     488    &gsprintf($out, "{export.removing_export}\n");
     489    &util::rm_r ($exportdir);
     490    }
     491   
     492    # create the export dir if needed
     493    &util::mk_all_dir($exportdir);
     494   
     495    # read the export information file
     496    # If saveas=DSpace, a "contents" file will be created, otherwise "export.inf"
     497   
     498    # the plugouts should be doing this!!
     499    if ($saveas eq "DSpace"){
     500    $export_info_filename = &util::filename_cat ($exportdir, "contents");
     501    } elsif ($saveas =~ m/^.*METS$/ || $saveas eq "MARC" ) {
     502    $export_info_filename = &util::filename_cat ($exportdir, "export.inf");
     503    }
     504       
     505    $export_info = new arcinfo();
     506    $export_info -> load_info ($export_info_filename); 
     507       
     508    my ($plugout);
     509    if (defined $collectcfg->{'plugout'} && $collectcfg->{'plugout'} =~ /^(.*METS|DSpace|MARCXML)Plugout/) {
     510    $plugout = $collectcfg->{'plugout'};
     511    }
     512    else{
     513    if ($saveas !~ /^(.*METS|DSpace|MARCXML)$/) {
     514        push @$plugout,"GreenstoneMETSPlugout";
     515    }
     516    else{
     517        push @$plugout,$saveas."Plugout";
     518    }
     519    }
     520   
     521    my $plugout_name = $plugout->[0];
     522       
     523    push @$plugout,("-output_info",$export_info) if (defined $export_info);
     524    push @$plugout,("-verbosity",$verbosity) if (defined $verbosity);
     525    push @$plugout,("-debug") if ($debug);
     526    push @$plugout,("-gzip_output",$gzip) if (defined $gzip);
     527    push @$plugout,("-output_handle",$out) if (defined $out);
     528    push @$plugout,("-xslt_file",$xsltfile) if (defined $xsltfile);
     529    push @$plugout,("-group") if ($group_marc && $plugout_name =~ m/^MARCXMLPlugout$/);
     530    push @$plugout,("-mapping_file",$mapping_file) if (defined $mapping_file && $plugout_name =~ m/^MARCXMLPlugout$/);
     531    push @$plugout,("-xslt_mets",$xslt_mets) if (defined $xslt_mets && $plugout_name =~ m/^.*METSPlugout$/);
     532    push @$plugout,("-xslt_txt",$xslt_txt) if (defined $xslt_txt && $plugout_name =~ m/^.*METSPlugout$/);
     533    push @$plugout,("-fedora_namespace",$fedora_namespace) if (defined $fedora_namespace && $plugout_name eq "FedoraMETSPlugout");
     534   
     535    $processor = &plugout::load_plugout($plugout);   
     536    $processor->setoutputdir ($exportdir);
     537       
     538    $processor->set_OIDtype ($OIDtype, $OIDmetadata);
     539       
     540    &plugin::begin($pluginfo, $importdir, $processor, $maxdocs, $gli);
     541       
     542    if ($manifest eq "") {
     543    # process the import directory
     544    my $block_hash = {};
     545    my $metadata = {};
     546    # gobal blocking pass may set up some metadata
     547    &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli);
     548    &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
     549    }
     550    else {
     551    # process any files marked for exporting
     552    foreach my $file (keys %{$manifest_lookup->{'export'}}) {
     553        &plugin::read ($pluginfo, $importdir, $file, {}, {}, $processor, $maxdocs, 0, $gli);
     554    }
     555    }
     556
     557    if ($saveas eq "FedoraMETS") {
     558    # create collection "doc obj" for Fedora that contains
     559    # collection-level metadata
     560   
     561    my $doc_obj = new doc($configfilename,"nonindexed_doc");
     562    $doc_obj->set_OID("collection");
     563   
     564    my $col_name = undef;
     565    my $col_meta = $collectcfg->{'collectionmeta'};
     566   
     567    if (defined $col_meta) {
     568       
     569        store_collectionmeta($col_meta,"collectionname",$doc_obj); # in GS3 this is a collection's name
     570        store_collectionmeta($col_meta,"collectionextra",$doc_obj); # in GS3 this is a collection's description
     571       
     572    }
     573    $processor->process($doc_obj);
     574    }
     575       
     576    &plugin::end($pluginfo, $processor);
     577
     578    &plugin::deinit($pluginfo, $processor);
     579       
     580    # write out the export information file
     581    #$processor->close_file_output() if $groupsize > 1;
     582    $processor->close_group_output() if $processor->is_group();
     583    # why do we need this??
     584    if ($saveas =~ m/^.*METS$/) {
     585    $export_info->save_info($export_info_filename);
     586    }
     587       
     588    # write out export stats
     589    my $close_stats = 0;
     590    if ($statsfile !~ /^(STDERR|STDOUT)$/i) {
     591    if (open (STATS, ">$statsfile")) {
     592        $statsfile = 'import::STATS';
     593        $close_stats = 1;
     594    } else {
     595        &gsprintf($out, "{import.cannot_open_stats_file}", $statsfile);
     596        &gsprintf($out, "{import.stats_backup}\n");
     597        $statsfile = 'STDERR';
     598    }
     599    }
     600
     601    &gsprintf($out, "\n");
     602    &gsprintf($out, "*********************************************\n");
     603    &gsprintf($out, "{export.complete}\n");
     604    &gsprintf($out, "*********************************************\n");
     605   
     606    &plugin::write_stats($pluginfo, $statsfile, $faillogname, $gli);
     607    if ($close_stats) {
     608    close STATS;
     609    }
     610   
     611    close OUT if $close_out;
     612    close FAILLOG;
     613}
    275614
    276615
     
    314653
    315654
    316 &main();
    317 
    318 sub main {
    319     # params
    320     my ($language, $verbosity, $debug,
    321     $importdir, $manifest, $incremental, $keepold,
    322     $exportdir, $site, $listall,
    323     $removeold, $saveas,
    324     $OIDtype, $OIDmetadata,
    325     $out, $faillog, $collectdir, $gli,
    326     $gs_mode, $collectcfg,
    327     $maxdocs, $statsfile,
    328     $mapping_file,
    329     $xsltfile,
    330     $xslt_mets, $xslt_txt, $fedora_namespace, $group_marc);
    331 
    332     my $xml = 0;
    333    
    334     # other vars
    335     my ($configfilename, $collection,
    336     $export_info_filename, $export_info,
    337     $processor, $pluginfo);
    338 
    339     my $service = "export";
    340 
    341     my $hashParsingResult = {};
    342     # general options available to all plugins
    343     my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
    344    
    345     # If parse returns -1 then something has gone wrong
    346     if ($intArgLeftinAfterParsing == -1)
    347     {
    348     &PrintUsage::print_txt_usage($options, "{export.params}");
    349     die "\n";
    350     }
    351 
    352     foreach my $strVariable (keys %$hashParsingResult)
    353     {
    354     eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
    355     }
    356 
    357    
    358     # these are options used by other things - we just set default values
    359     # undef means will be set from config file if there
    360     my $gzip = undef;
    361     my $groupsize = 1;
    362     #my $OIDtype = undef;
    363     my $sortmeta = undef;
    364 
    365     my $explicit_exportdir = (defined $exportdir) ? 1 : 0;
    366 
    367     # save these command line settings. don't want config file settings in one
    368     # coll used for other colls
    369     # does this apply to other vars???
    370     my $global_removeold = $removeold;
    371     my $global_keepold = $keepold;
    372     # If $language has been specified, load the appropriate resource bundle
    373     # (Otherwise, the default resource bundle will be loaded automatically)
    374     if ($language) {
    375     &gsprintf::load_language_specific_resource_bundle($language);
    376     }
    377 
    378     if ($listall) {
    379     if ($xml) {
    380         &PrintUsage::print_xml_usage($listall_options);
    381     }
    382     else
    383     {
    384         &PrintUsage::print_txt_usage($listall_options,"{export.params}");
    385     }
    386     die "\n";
    387     }
    388     elsif ($xml) {
    389         &PrintUsage::print_xml_usage($options);
    390     die "\n";
    391     }
    392 
    393     # can have more than one collection name, 
    394     # if the first extra option is -h, then output the help
    395     if (scalar(@ARGV) == 0 || (@ARGV && $ARGV[0] =~ /^\-+h/)) {
    396     &PrintUsage::print_txt_usage($options, "{export.params}");
    397     die "\n";
    398     }
    399 
    400     if ($gli) { # the gli wants strings to be in UTF-8
    401     &gsprintf::output_strings_in_UTF8;
    402     }
    403     my $close_out = 0;
    404     if ($out !~ /^(STDERR|STDOUT)$/i) {
    405     open (OUT, ">$out") ||
    406         (&gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
    407     $out = 'export::OUT';
    408     $close_out = 1;
    409     }
    410     $out->autoflush(1);
    411 
    412     while (scalar(@ARGV)>0) {
    413     my $collect_name = shift @ARGV;
    414     $ENV{'GSDLCOLLECTION'} = $collect_name;
    415 
    416     eval {
    417         # get and check the collection name
    418         if (($collection = &colcfg::use_collection($site, $collect_name, $collectdir)) eq "") {
    419         &PrintUsage::print_txt_usage($options, "{export.params}");
    420         die "\n";
    421         }
    422         # add collection's perllib dir  into include path in
    423         # case we have collection specific modules
    424         unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
    425        
    426         if ($faillog eq "") {
    427         $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
    428         }
    429         open (FAILLOG, ">$faillog") ||
    430         (&gsprintf(STDERR, "{export.cannot_open_fail_log}\n", $faillog) && die);
    431         my $faillogname = $faillog;
    432         $faillog = 'export::FAILLOG';
    433         $faillog->autoflush(1);
    434        
    435         # check sortmeta
    436         $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/;
    437         if (defined $sortmeta && $groupsize > 1) {
    438         &gsprintf($out, "{export.cannot_sort}\n\n");
    439         $sortmeta = undef;
    440         }
    441        
    442         # get the list of plugins for this collection and set any options that
    443         # were specified in the collect.cfg (all export.pl options except
    444         # -collectdir, -out and -faillog may be specified in the collect.cfg (these
    445         # options must be known before we read the collect.cfg))
    446         my $plugins = [];
    447         my @global_opts = ();
    448 
    449         # Read in the collection configuration file.
    450         ($configfilename, $gs_mode) = &colcfg::get_collect_cfg_name($out);
    451 
    452         if ($gs_mode eq "gs2") {
    453         $collectcfg = &colcfg::read_collect_cfg ($configfilename);
    454         } elsif ($gs_mode eq "gs3") {
    455         $collectcfg = &colcfg::read_collection_cfg_xml ($configfilename);
    456         }
    457 
    458         if (defined $collectcfg->{'importdir'} && $importdir eq "") {
    459         $importdir = $collectcfg->{'importdir'};
    460         }
    461         if (defined $collectcfg->{'exportdir'} && $exportdir eq "") {
    462         $exportdir = $collectcfg->{'exportdir'};
    463         }
    464 
    465         # fill in the default import and export directories if none
    466         # were supplied, turn all \ into / and remove trailing /
    467         $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq "";
    468         $importdir =~ s/[\\\/]+/\//g;
    469         $importdir =~ s/\/$//;
    470         if (!-e $importdir) {
    471         &gsprintf($out, "{import.no_import_dir}\n\n", $importdir);
    472         die "\n";
    473         }
    474 
    475         $exportdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "export") if $exportdir eq "";
    476         $exportdir =~ s/[\\\/]+/\//g;
    477         $exportdir =~ s/\/$//;
    478 
    479         if (defined $collectcfg->{'plugin'}) {
    480         $plugins = $collectcfg->{'plugin'};
    481         }
    482        
    483         if ($verbosity !~ /\d+/) {
    484         if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
    485             $verbosity = $collectcfg->{'verbosity'};
    486         } else {
    487             $verbosity = 2; # the default
    488         }
    489         }
    490 
    491         if (defined $collectcfg->{'gzip'} && !$gzip) {
    492         if ($collectcfg->{'gzip'} =~ /^true$/i) {
    493             $gzip = 1;
    494         }
    495         }
    496         if ($maxdocs !~ /\-?\d+/) {
    497         if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
    498             $maxdocs = $collectcfg->{'maxdocs'};
    499         } else {
    500             $maxdocs = -1; # the default
    501         }
    502         }
    503         if ($groupsize == 1) {
    504         if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) {
    505             $groupsize = $collectcfg->{'groupsize'};
    506         }
    507         }
    508         if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental|assigned|dirname)$/)) {
    509         if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental)$/) {
    510             $OIDtype = $collectcfg->{'OIDtype'};
    511         } else {
    512             $OIDtype = "hash"; # the default
    513         }
    514         }
    515         if (defined $collectcfg->{'sortmeta'} && $sortmeta eq "") {
    516         $sortmeta = $collectcfg->{'sortmeta'};
    517         }
    518         if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
    519         $debug = 1;
    520         }
    521         if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) {
    522         $gli = 1;
    523         }
    524        
    525         # global plugin stuff
    526         if (defined $collectcfg->{'separate_cjk'}&& $collectcfg->{'separate_cjk'} =~ /^true$/i) {
    527         push @global_opts, "-separate_cjk";
    528         }
    529        
    530         ($removeold, $keepold) = &scriptutil::check_removeold_and_keepold($global_removeold, $global_keepold, 0, "export", $collectcfg);
    531 
    532         $gli = 0 unless defined $gli;
    533 
    534         print STDERR "<export>\n" if $gli;
    535        
    536         # fill in the default import and export directories if none
    537         # were supplied, turn all \ into / and remove trailing /
    538         $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq "";
    539         $importdir =~ s/[\\\/]+/\//g;
    540         $importdir =~ s/\/$//;
    541         $exportdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "export") if $exportdir eq "";
    542         $exportdir =~ s/[\\\/]+/\//g;
    543         $exportdir =~ s/\/$//;
    544        
    545         # load all the plugins
    546         $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts);
    547        
    548         if (scalar(@$pluginfo) == 0) {
    549         &gsprintf($out, "{import.no_plugins_loaded}\n");
    550         die "\n";
    551         }
    552        
    553         # remove the old contents of the export directory if needed
    554         if ($removeold && -e $exportdir) {
    555         &gsprintf($out, "{export.removing_export}\n");
    556         &util::rm_r ($exportdir);
    557         }
    558        
    559         # read the export information file
    560        
    561         # Export to DSpace Archive format or METs format
    562         # If saveas=DSpace, a "contents" file will be created, otherwise "export.inf"
    563        
    564         # the plugouts should be doing this!!
    565         if ($saveas eq "DSpace"){
    566         $export_info_filename = &util::filename_cat ($exportdir, "contents");
    567         } elsif ($saveas =~ m/^.*METS$/ || $saveas eq "GA" || $saveas eq "MARC" ) {
    568         $export_info_filename = &util::filename_cat ($exportdir, "export.inf");
    569         }
    570        
    571         $export_info = new arcinfo();
    572         $export_info -> load_info ($export_info_filename); 
    573        
    574         my ($plugout);
    575         if (defined $collectcfg->{'plugout'}) {
    576         $plugout = $collectcfg->{'plugout'};
    577         }
    578         else{
    579         if ($saveas !~ /^(GA|.*METS|DSpace|MARCXML)$/) {
    580             push @$plugout,"GAPlugout";
    581         }
    582         else{
    583             push @$plugout,$saveas."Plugout";
    584         }
    585         }
    586 
    587         my $plugout_name = $plugout->[0];
    588        
    589         push @$plugout,("-output_info",$export_info) if (defined $export_info);
    590         push @$plugout,("-verbosity",$verbosity) if (defined $verbosity);
    591         push @$plugout,("-debug") if ($debug);
    592         push @$plugout,("-gzip_output",$gzip) if (defined $gzip);
    593         push @$plugout,("-group_size",$groupsize) if (defined $groupsize);
    594         push @$plugout,("-output_handle",$out) if (defined $out);
    595         push @$plugout,("-xslt_file",$xsltfile) if (defined $xsltfile);
    596         push @$plugout,("-group") if ($group_marc && $plugout_name =~ m/^MARCXMLPlugout$/);
    597         push @$plugout,("-mapping_file",$mapping_file) if (defined $mapping_file && $plugout_name =~ m/^MARCXMLPlugout$/);
    598         push @$plugout,("-xslt_mets",$xslt_mets) if (defined $xslt_mets && $plugout_name =~ m/^.*METSPlugout$/);
    599         push @$plugout,("-xslt_txt",$xslt_txt) if (defined $xslt_txt && $plugout_name =~ m/^.*METSPlugout$/);
    600         push @$plugout,("-fedora_namespace",$fedora_namespace) if (defined $fedora_namespace && $plugout_name eq "FedoraMETSPlugout");
    601 
    602         $processor = &plugout::load_plugout($plugout);   
    603        
    604         $processor->setoutputdir ($exportdir);
    605        
    606         $processor->set_sortmeta ($sortmeta) if defined $sortmeta;
    607         $processor->set_OIDtype ($OIDtype, $OIDmetadata);
    608        
    609         &plugin::begin($pluginfo, $importdir, $processor, $maxdocs);
    610        
    611         # process the import directory
    612         my $block_hash = {};
    613         my $metadata = {};
    614         # gobal blocking pass may set up some metadata
    615         &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli);
    616         &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
    617 
    618 
    619         if ($saveas eq "FedoraMETS")
    620         {
    621         # create collection "doc obj" for Fedora that contains
    622         # collection-level metadata
    623 
    624         my $doc_obj = new doc($configfilename,"nonindexed_doc");
    625         $doc_obj->set_OID("collection");
    626        
    627         my $col_name = undef;
    628         my $col_meta = $collectcfg->{'collectionmeta'};
    629        
    630         if (defined $col_meta)
    631         {
    632             store_collectionmeta($col_meta,"collectionname",$doc_obj); # in GS3 this is a collection's name
    633             store_collectionmeta($col_meta,"collectionextra",$doc_obj); # in GS3 this is a collection's description
    634            
    635         }
    636         $processor->process($doc_obj);
    637         }
    638        
    639         &plugin::end($pluginfo, $processor);
    640 
    641         &plugin::deinit($pluginfo, $processor);
    642        
    643         # write out the export information file
    644         $processor->close_file_output() if $groupsize > 1;
    645         $processor->close_group_output() if $processor->is_group();
    646         if ($saveas =~ m/^.*METS$/) {
    647         $export_info->save_info($export_info_filename);
    648         }
    649        
    650         # write out export stats
    651         my $close_stats = 0;
    652         if ($statsfile !~ /^(STDERR|STDOUT)$/i) {
    653         if (open (STATS, ">$statsfile")) {
    654             $statsfile = 'import::STATS';
    655             $close_stats = 1;
    656         } else {
    657             &gsprintf($out, "{import.cannot_open_stats_file}", $statsfile);
    658             &gsprintf($out, "{import.stats_backup}\n");
    659             $statsfile = 'STDERR';
    660         }
    661         }
    662 
    663         &gsprintf($out, "\n");
    664         &gsprintf($out, "*********************************************\n");
    665 
    666         &plugin::write_stats($pluginfo, $statsfile, $faillogname, $gli);
    667         if ($close_stats) {
    668         close STATS;
    669         }
    670 
    671         &gsprintf($out, "*********************************************\n");
    672 
    673         close OUT if $close_out;
    674 
    675         close FAILLOG;
    676     };
    677 
    678     if ($@) {
    679         print STDERR $@;
    680     }
    681 
    682 ##  $ENV{'GSDLCOLLECTION'} = undef;
    683     $importdir = "";
    684     $removeold = 0 if ($explicit_exportdir);
    685 
    686     } # while processing ARGV
    687    
    688     &gsprintf($out, "\n");
    689     &gsprintf($out, "*********************************************\n");
    690     &gsprintf($out, "* {export.complete}\n");
    691     &gsprintf($out, "*********************************************\n");   
    692 
    693 }
     655
     656
  • gsdl/trunk/bin/script/import.pl

    r17038 r17142  
    253253    'modegli' => "4" },
    254254      { 'name' => "gli",
    255     'desc' => "",
     255    'desc' => "{scripts.gli}",
    256256    'type' => "flag",
    257257    'reqd' => "no",
     
    273273    # params
    274274    my ($language, $verbosity, $debug,
    275     $importdir, $site, $manifest, $incremental, $keepold,
    276     $removeold, $saveas,
     275    $collectdir, $importdir, $archivedir, $site, $manifest,
     276    $incremental, $keepold, $removeold,
     277    $saveas,
    277278    $OIDtype, $OIDmetadata,
    278279    $maxdocs, $statsfile,
    279     $out, $faillog, $collectdir, $gli,
    280 
    281     $archivedir,
     280    $out, $faillog, $gli,
    282281    $gzip, $groupsize,
    283     $sortmeta, $reversesort, $removeprefix, $removesuffix,
     282    $sortmeta, $reversesort, $removeprefix, $removesuffix
    284283    );
    285284
     
    287286
    288287    # other vars
    289     my ($configfilename, $collectcfg, $collection,
     288    my ($configfilename, $collection, $collectcfg,
    290289    $archive_info_filename, $archive_info,
    291290    $gs_mode,
     
    331330    if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/))
    332331    {
    333     &PrintUsage::print_txt_usage($options, "{buildcol.params}");
     332    &PrintUsage::print_txt_usage($options, "{import.params}");
    334333    die "\n";
    335334    }
     
    432431    }
    433432
    434     if ($OIDtype !~ /^(hash|incremental|assigned|dirname)$/) {
     433    if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental|assigned|dirname)$/ )) {
    435434    if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental|assigned|dirname)$/) {
    436435        $OIDtype = $collectcfg->{'OIDtype'};
    437436    } else {
    438437        $OIDtype = "hash"; # the default
    439     }
    440     }
    441 
    442     my ($plugout);
    443     if (defined $collectcfg->{'plugout'}) {
    444     $plugout = $collectcfg->{'plugout'};
    445     }
    446     else{
    447     if ($saveas !~ /^(GA|.*METS|DSpace|MARCXML)$/) {
    448         push @$plugout,"GAPlugout";
    449     }
    450     else{
    451         push @$plugout,$saveas."Plugout";
    452438    }
    453439    }
     
    484470    $gli = 1;
    485471    }
    486    
    487    
    488     # global plugin stuff
    489     if (defined $collectcfg->{'separate_cjk'} && $collectcfg->{'separate_cjk'} =~ /^true$/i) {
    490     push @global_opts, "-separate_cjk";
    491     }
    492    
     472    $gli = 0 unless defined $gli;
     473       
    493474    # check keepold and removeold
    494475    ($removeold, $keepold, $incremental) = &scriptutil::check_removeold_and_keepold($removeold, $keepold, $incremental, "archives", $collectcfg);
    495476 
    496     $gli = 0 unless defined $gli;
    497477
    498478    print STDERR "<Import>\n" if $gli;
     
    538518
    539519    # read the archive information file
    540 
    541     # If saveas=DSpace, a "contents" file will be created, otherwise "archives.inf"
    542    
    543     # the plugouts should be doing this!!
    544     if ($saveas eq "DSpace"){
    545     $archive_info_filename = &util::filename_cat ($archivedir, "contents");
    546     } elsif ($saveas =~ m/^.*METS$/ || $saveas eq "GA" || $saveas eq "MARC" ) {
    547     $archive_info_filename = &util::filename_cat ($archivedir, "archives.inf");
    548     }
    549 
     520    $archive_info_filename = &util::filename_cat ($archivedir, "archives.inf");
     521   
    550522    $archive_info = new arcinfo ();
    551523    $archive_info->load_info ($archive_info_filename);
     
    555527
    556528    ####Use Plugout####
     529    my ($plugout);
     530    if (defined $collectcfg->{'plugout'} && $collectcfg->{'plugout'} =~ /^(GA|GreenstoneMETS)Plugout/) {
     531    $plugout = $collectcfg->{'plugout'};
     532    }
     533    else{
     534    if ($saveas !~ /^(GA|GreenstoneMETS)$/) {
     535        push @$plugout,"GAPlugout";
     536    }
     537    else{
     538        push @$plugout,$saveas."Plugout";
     539    }
     540    }
     541
    557542    push @$plugout,("-output_info",$archive_info) if (defined $archive_info);
    558543    push @$plugout,("-verbosity",$verbosity) if (defined $verbosity);
     
    590575    }
    591576    }
    592 
    593     if ($saveas eq "FedoraMETS")
    594     {
    595     # This would be better (should?!) be done in Plugout!!
    596 
    597     # create collection "doc obj" for Fedora that contains
    598     # collection-level metadata
    599    
    600     my $doc_obj = new doc($configfilename,"nonindexed_doc");
    601     $doc_obj->set_OID("collection");
    602    
    603     my $col_name = undef;
    604     my $col_meta = $collectcfg->{'collectionmeta'};
    605     if (defined $col_meta)
    606     {
    607         store_collectionmeta($col_meta,"collectionname",$doc_obj);
    608         store_collectionmeta($col_meta,"collectioextra",$doc_obj);
    609        
    610     }
    611    
    612     $processor->process($doc_obj);
    613     }
    614 
    615577
    616578    &plugin::end($pluginfo, $processor);
Note: See TracChangeset for help on using the changeset viewer.