Changeset 17142
- Timestamp:
- 2008-09-04T14:27:27+12:00 (16 years ago)
- Location:
- gsdl/trunk/bin/script
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/bin/script/export.pl
r16425 r17142 8 8 # University of Waikato, New Zealand. 9 9 # 10 # Copyright (C) 1999New Zealand Digital Library Project10 # Copyright (C) 2004 New Zealand Digital Library Project 11 11 # 12 12 # This program is free software; you can redistribute it and/or modify … … 28 28 29 29 # This program will export a particular collection into a specific Format (e.g. METS or DSpace) 30 # Author: Chi-Yu Huang Date: 08-10-200431 30 32 31 package export; … … 39 38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins"); 40 39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts"); 41 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");42 40 43 41 if (defined $ENV{'GSDLEXTS'}) { … … 48 46 unshift (@INC, "$ext_prefix/perllib"); 49 47 unshift (@INC, "$ext_prefix/perllib/cpan"); 48 unshift (@INC, "$ext_prefix/perllib/plugins"); 49 unshift (@INC, "$ext_prefix/perllib/plugouts"); 50 50 } 51 51 } … … 79 79 'desc' => "{import.OIDtype.dirname}" } ]; 80 80 81 # ** define to use the METS format or DSpace format81 # what format to export as 82 82 my $saveas_list = 83 [ { 'name' => "DSpace", 84 'desc' => "{export.saveas.DSpace}" }, 85 { 'name' => "GreenstoneMETS", 83 [ { 'name' => "GreenstoneMETS", 86 84 'desc' => "{export.saveas.GreenstoneMETS}"}, 87 85 { 'name' => "FedoraMETS", 88 86 'desc' => "{export.saveas.FedoraMETS}"}, 89 { 'name' => "GA",90 'desc' => "{export.saveas.GA}"},91 87 { 'name' => "MARCXML", 92 'desc' => "{export.saveas.MARCXML}"} 88 'desc' => "{export.saveas.MARCXML}"}, 89 { 'name' => "DSpace", 90 'desc' => "{export.saveas.DSpace}" } 93 91 ]; 94 92 … … 107 105 'type' => "enum", 108 106 'list' => $saveas_list, 109 'deft' => " METS",107 'deft' => "GreenstoneMETS", 110 108 'reqd' => "no", 111 109 'modegli' => "3" }; … … 125 123 'reqd' => "no", 126 124 'hiddengli' => "yes" }, 127 { 'name' => "site",128 'desc' => "{import.site}",129 'type' => "string",130 'deft' => "",131 'reqd' => "no",132 'hiddengli' => "yes" },133 125 { 'name' => "collectdir", 134 126 'desc' => "{export.collectdir}", … … 139 131 'reqd' => "no", 140 132 'hiddengli' => "yes" }, 141 { 'name' => "manifest", 133 { 'name' => "site", 134 'desc' => "{import.site}", 135 'type' => "string", 136 'deft' => "", 137 'reqd' => "no", 138 'hiddengli' => "yes" }, 139 { 'name' => "manifest", 142 140 'desc' => "{import.manifest}", 143 141 'type' => "string", … … 160 158 'reqd' => "no", 161 159 'modegli' => "4" }, 162 { 'name' => "incremental", 163 'desc' => "{import.incremental}", 164 'type' => "flag", 165 'hiddengli' => "yes" }, 160 # does this make sense? 161 # { 'name' => "incremental", 162 # 'desc' => "{import.incremental}", 163 # 'type' => "flag", 164 # 'hiddengli' => "yes" }, 166 165 { 'name' => "keepold", 167 166 'desc' => "{export.keepold}", … … 250 249 'modegli' => "4" }, 251 250 { 'name' => "gli", 252 'desc' => " ",251 'desc' => "{scripts.gli}", 253 252 'type' => "flag", 254 253 'reqd' => "no", … … 269 268 'args' => [ $saveas_argument ] }; 270 269 271 #sub gsprintf 272 #{ 273 # return &gsprintf::gsprintf(@_); 274 #} 270 271 &main(); 272 273 sub main { 274 # params 275 my ($language, $verbosity, $debug, 276 $collectdir, $importdir, $exportdir, $site, $manifest, 277 $incremental, $keepold, $removeold, 278 $saveas, 279 $OIDtype, $OIDmetadata, 280 $maxdocs, $statsfile, 281 $out, $faillog, $gli, $listall, 282 # plugout specific ones 283 $mapping_file, $xsltfile, 284 $xslt_mets, $xslt_txt, $fedora_namespace, $group_marc); 285 286 my $xml = 0; 287 288 # other vars 289 my ($configfilename, $collection, $collectcfg, 290 $export_info_filename, $export_info, 291 $gs_mode, 292 $processor, $pluginfo); 293 294 my $service = "export"; 295 296 my $hashParsingResult = {}; 297 # general options available to all plugins 298 my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options"); 299 300 # If parse returns -1 then something has gone wrong 301 if ($intArgLeftinAfterParsing == -1) 302 { 303 &PrintUsage::print_txt_usage($options, "{export.params}"); 304 die "\n"; 305 } 306 307 foreach my $strVariable (keys %$hashParsingResult) 308 { 309 eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}"; 310 } 311 312 313 # these are options used by other things - we just set default values 314 # undef means will be set from config file if there 315 my $gzip = undef; 316 317 # If $language has been specified, load the appropriate resource bundle 318 # (Otherwise, the default resource bundle will be loaded automatically) 319 if ($language && $language =~ /\S/) { 320 &gsprintf::load_language_specific_resource_bundle($language); 321 } 322 323 if ($listall) { 324 if ($xml) { 325 &PrintUsage::print_xml_usage($listall_options); 326 } 327 else 328 { 329 &PrintUsage::print_txt_usage($listall_options,"{export.params}"); 330 } 331 die "\n"; 332 } 333 334 if ($xml) { 335 &PrintUsage::print_xml_usage($options); 336 die "\n"; 337 } 338 339 if ($gli) { # the gli wants strings to be in UTF-8 340 &gsprintf::output_strings_in_UTF8; 341 } 342 343 # now check that we had exactly one leftover arg, which should be 344 # the collection name. We don't want to do this earlier, cos 345 # -xml arg doesn't need a collection name 346 # Or if the user specified -h, then we output the usage also 347 if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/)) 348 { 349 &PrintUsage::print_txt_usage($options, "{export.params}"); 350 die "\n"; 351 } 352 353 my $close_out = 0; 354 if ($out !~ /^(STDERR|STDOUT)$/i) { 355 open (OUT, ">$out") || 356 (&gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die); 357 $out = 'export::OUT'; 358 $close_out = 1; 359 } 360 $out->autoflush(1); 361 362 # get and check the collection name 363 if (($collection = &colcfg::use_collection($site, @ARGV, $collectdir)) eq "") { 364 &PrintUsage::print_txt_usage($options, "{export.params}"); 365 die "\n"; 366 } 367 # add collection's perllib dir into include path in 368 # case we have collection specific modules 369 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib"); 370 371 # check that we can open the faillog 372 if ($faillog eq "") { 373 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log"); 374 } 375 open (FAILLOG, ">$faillog") || 376 (&gsprintf(STDERR, "{export.cannot_open_fail_log}\n", $faillog) && die); 377 my $faillogname = $faillog; 378 $faillog = 'export::FAILLOG'; 379 $faillog->autoflush(1); 380 381 # Read in the collection configuration file. 382 ($configfilename, $gs_mode) = &colcfg::get_collect_cfg_name($out); 383 384 385 # Read in the collection configuration file. 386 ($configfilename, $gs_mode) = &colcfg::get_collect_cfg_name($out); 387 388 if ($gs_mode eq "gs2") { 389 $collectcfg = &colcfg::read_collect_cfg ($configfilename); 390 } elsif ($gs_mode eq "gs3") { 391 $collectcfg = &colcfg::read_collection_cfg_xml ($configfilename); 392 } 393 394 if (defined $collectcfg->{'importdir'} && $importdir eq "") { 395 $importdir = $collectcfg->{'importdir'}; 396 } 397 if (defined $collectcfg->{'exportdir'} && $exportdir eq "") { 398 $exportdir = $collectcfg->{'exportdir'}; 399 } 400 401 # fill in the default import and export directories if none 402 # were supplied, turn all \ into / and remove trailing / 403 $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq ""; 404 $importdir =~ s/[\\\/]+/\//g; 405 $importdir =~ s/\/$//; 406 if (!-e $importdir) { 407 &gsprintf($out, "{import.no_import_dir}\n\n", $importdir); 408 die "\n"; 409 } 410 411 $exportdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "export") if $exportdir eq ""; 412 $exportdir =~ s/[\\\/]+/\//g; 413 $exportdir =~ s/\/$//; 414 415 my $plugins = []; 416 if (defined $collectcfg->{'plugin'}) { 417 $plugins = $collectcfg->{'plugin'}; 418 } 419 # some global options for the plugins 420 my @global_opts = (); 421 422 if ($verbosity !~ /\d+/) { 423 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) { 424 $verbosity = $collectcfg->{'verbosity'}; 425 } else { 426 $verbosity = 2; # the default 427 } 428 } 429 430 if (defined $collectcfg->{'gzip'} && !$gzip) { 431 if ($collectcfg->{'gzip'} =~ /^true$/i) { 432 $gzip = 1; 433 } 434 } 435 if ($maxdocs !~ /\-?\d+/) { 436 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) { 437 $maxdocs = $collectcfg->{'maxdocs'}; 438 } else { 439 $maxdocs = -1; # the default 440 } 441 } 442 443 if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental|assigned|dirname)$/)) { 444 if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental|assigned|dirname)$/) { 445 $OIDtype = $collectcfg->{'OIDtype'}; 446 } else { 447 $OIDtype = "hash"; # the default 448 } 449 } 450 451 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) { 452 $debug = 1; 453 } 454 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) { 455 $gli = 1; 456 } 457 $gli = 0 unless defined $gli; 458 459 # check keepold and removeold 460 ($removeold, $keepold, $incremental) = &scriptutil::check_removeold_and_keepold($removeold, $keepold, $incremental, "export", $collectcfg); 461 462 print STDERR "<export>\n" if $gli; 463 464 my $manifest_lookup = new manifest(); 465 if ($manifest ne "") { 466 my $manifest_filename = $manifest; 467 468 if ($manifest_filename !~ m/^[\\\/]/) { 469 $manifest_filename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, $manifest_filename); 470 } 471 472 $manifest =~ s/[\\\/]+/\//g; 473 $manifest =~ s/\/$//; 474 475 $manifest_lookup->parse($manifest_filename); 476 } 477 478 # load all the plugins 479 $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts); 480 481 if (scalar(@$pluginfo) == 0) { 482 &gsprintf($out, "{import.no_plugins_loaded}\n"); 483 die "\n"; 484 } 485 486 # remove the old contents of the export directory if needed 487 if ($removeold && -e $exportdir) { 488 &gsprintf($out, "{export.removing_export}\n"); 489 &util::rm_r ($exportdir); 490 } 491 492 # create the export dir if needed 493 &util::mk_all_dir($exportdir); 494 495 # read the export information file 496 # If saveas=DSpace, a "contents" file will be created, otherwise "export.inf" 497 498 # the plugouts should be doing this!! 499 if ($saveas eq "DSpace"){ 500 $export_info_filename = &util::filename_cat ($exportdir, "contents"); 501 } elsif ($saveas =~ m/^.*METS$/ || $saveas eq "MARC" ) { 502 $export_info_filename = &util::filename_cat ($exportdir, "export.inf"); 503 } 504 505 $export_info = new arcinfo(); 506 $export_info -> load_info ($export_info_filename); 507 508 my ($plugout); 509 if (defined $collectcfg->{'plugout'} && $collectcfg->{'plugout'} =~ /^(.*METS|DSpace|MARCXML)Plugout/) { 510 $plugout = $collectcfg->{'plugout'}; 511 } 512 else{ 513 if ($saveas !~ /^(.*METS|DSpace|MARCXML)$/) { 514 push @$plugout,"GreenstoneMETSPlugout"; 515 } 516 else{ 517 push @$plugout,$saveas."Plugout"; 518 } 519 } 520 521 my $plugout_name = $plugout->[0]; 522 523 push @$plugout,("-output_info",$export_info) if (defined $export_info); 524 push @$plugout,("-verbosity",$verbosity) if (defined $verbosity); 525 push @$plugout,("-debug") if ($debug); 526 push @$plugout,("-gzip_output",$gzip) if (defined $gzip); 527 push @$plugout,("-output_handle",$out) if (defined $out); 528 push @$plugout,("-xslt_file",$xsltfile) if (defined $xsltfile); 529 push @$plugout,("-group") if ($group_marc && $plugout_name =~ m/^MARCXMLPlugout$/); 530 push @$plugout,("-mapping_file",$mapping_file) if (defined $mapping_file && $plugout_name =~ m/^MARCXMLPlugout$/); 531 push @$plugout,("-xslt_mets",$xslt_mets) if (defined $xslt_mets && $plugout_name =~ m/^.*METSPlugout$/); 532 push @$plugout,("-xslt_txt",$xslt_txt) if (defined $xslt_txt && $plugout_name =~ m/^.*METSPlugout$/); 533 push @$plugout,("-fedora_namespace",$fedora_namespace) if (defined $fedora_namespace && $plugout_name eq "FedoraMETSPlugout"); 534 535 $processor = &plugout::load_plugout($plugout); 536 $processor->setoutputdir ($exportdir); 537 538 $processor->set_OIDtype ($OIDtype, $OIDmetadata); 539 540 &plugin::begin($pluginfo, $importdir, $processor, $maxdocs, $gli); 541 542 if ($manifest eq "") { 543 # process the import directory 544 my $block_hash = {}; 545 my $metadata = {}; 546 # gobal blocking pass may set up some metadata 547 &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli); 548 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 549 } 550 else { 551 # process any files marked for exporting 552 foreach my $file (keys %{$manifest_lookup->{'export'}}) { 553 &plugin::read ($pluginfo, $importdir, $file, {}, {}, $processor, $maxdocs, 0, $gli); 554 } 555 } 556 557 if ($saveas eq "FedoraMETS") { 558 # create collection "doc obj" for Fedora that contains 559 # collection-level metadata 560 561 my $doc_obj = new doc($configfilename,"nonindexed_doc"); 562 $doc_obj->set_OID("collection"); 563 564 my $col_name = undef; 565 my $col_meta = $collectcfg->{'collectionmeta'}; 566 567 if (defined $col_meta) { 568 569 store_collectionmeta($col_meta,"collectionname",$doc_obj); # in GS3 this is a collection's name 570 store_collectionmeta($col_meta,"collectionextra",$doc_obj); # in GS3 this is a collection's description 571 572 } 573 $processor->process($doc_obj); 574 } 575 576 &plugin::end($pluginfo, $processor); 577 578 &plugin::deinit($pluginfo, $processor); 579 580 # write out the export information file 581 #$processor->close_file_output() if $groupsize > 1; 582 $processor->close_group_output() if $processor->is_group(); 583 # why do we need this?? 584 if ($saveas =~ m/^.*METS$/) { 585 $export_info->save_info($export_info_filename); 586 } 587 588 # write out export stats 589 my $close_stats = 0; 590 if ($statsfile !~ /^(STDERR|STDOUT)$/i) { 591 if (open (STATS, ">$statsfile")) { 592 $statsfile = 'import::STATS'; 593 $close_stats = 1; 594 } else { 595 &gsprintf($out, "{import.cannot_open_stats_file}", $statsfile); 596 &gsprintf($out, "{import.stats_backup}\n"); 597 $statsfile = 'STDERR'; 598 } 599 } 600 601 &gsprintf($out, "\n"); 602 &gsprintf($out, "*********************************************\n"); 603 &gsprintf($out, "{export.complete}\n"); 604 &gsprintf($out, "*********************************************\n"); 605 606 &plugin::write_stats($pluginfo, $statsfile, $faillogname, $gli); 607 if ($close_stats) { 608 close STATS; 609 } 610 611 close OUT if $close_out; 612 close FAILLOG; 613 } 275 614 276 615 … … 314 653 315 654 316 &main(); 317 318 sub main { 319 # params 320 my ($language, $verbosity, $debug, 321 $importdir, $manifest, $incremental, $keepold, 322 $exportdir, $site, $listall, 323 $removeold, $saveas, 324 $OIDtype, $OIDmetadata, 325 $out, $faillog, $collectdir, $gli, 326 $gs_mode, $collectcfg, 327 $maxdocs, $statsfile, 328 $mapping_file, 329 $xsltfile, 330 $xslt_mets, $xslt_txt, $fedora_namespace, $group_marc); 331 332 my $xml = 0; 333 334 # other vars 335 my ($configfilename, $collection, 336 $export_info_filename, $export_info, 337 $processor, $pluginfo); 338 339 my $service = "export"; 340 341 my $hashParsingResult = {}; 342 # general options available to all plugins 343 my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options"); 344 345 # If parse returns -1 then something has gone wrong 346 if ($intArgLeftinAfterParsing == -1) 347 { 348 &PrintUsage::print_txt_usage($options, "{export.params}"); 349 die "\n"; 350 } 351 352 foreach my $strVariable (keys %$hashParsingResult) 353 { 354 eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}"; 355 } 356 357 358 # these are options used by other things - we just set default values 359 # undef means will be set from config file if there 360 my $gzip = undef; 361 my $groupsize = 1; 362 #my $OIDtype = undef; 363 my $sortmeta = undef; 364 365 my $explicit_exportdir = (defined $exportdir) ? 1 : 0; 366 367 # save these command line settings. don't want config file settings in one 368 # coll used for other colls 369 # does this apply to other vars??? 370 my $global_removeold = $removeold; 371 my $global_keepold = $keepold; 372 # If $language has been specified, load the appropriate resource bundle 373 # (Otherwise, the default resource bundle will be loaded automatically) 374 if ($language) { 375 &gsprintf::load_language_specific_resource_bundle($language); 376 } 377 378 if ($listall) { 379 if ($xml) { 380 &PrintUsage::print_xml_usage($listall_options); 381 } 382 else 383 { 384 &PrintUsage::print_txt_usage($listall_options,"{export.params}"); 385 } 386 die "\n"; 387 } 388 elsif ($xml) { 389 &PrintUsage::print_xml_usage($options); 390 die "\n"; 391 } 392 393 # can have more than one collection name, 394 # if the first extra option is -h, then output the help 395 if (scalar(@ARGV) == 0 || (@ARGV && $ARGV[0] =~ /^\-+h/)) { 396 &PrintUsage::print_txt_usage($options, "{export.params}"); 397 die "\n"; 398 } 399 400 if ($gli) { # the gli wants strings to be in UTF-8 401 &gsprintf::output_strings_in_UTF8; 402 } 403 my $close_out = 0; 404 if ($out !~ /^(STDERR|STDOUT)$/i) { 405 open (OUT, ">$out") || 406 (&gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die); 407 $out = 'export::OUT'; 408 $close_out = 1; 409 } 410 $out->autoflush(1); 411 412 while (scalar(@ARGV)>0) { 413 my $collect_name = shift @ARGV; 414 $ENV{'GSDLCOLLECTION'} = $collect_name; 415 416 eval { 417 # get and check the collection name 418 if (($collection = &colcfg::use_collection($site, $collect_name, $collectdir)) eq "") { 419 &PrintUsage::print_txt_usage($options, "{export.params}"); 420 die "\n"; 421 } 422 # add collection's perllib dir into include path in 423 # case we have collection specific modules 424 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib"); 425 426 if ($faillog eq "") { 427 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log"); 428 } 429 open (FAILLOG, ">$faillog") || 430 (&gsprintf(STDERR, "{export.cannot_open_fail_log}\n", $faillog) && die); 431 my $faillogname = $faillog; 432 $faillog = 'export::FAILLOG'; 433 $faillog->autoflush(1); 434 435 # check sortmeta 436 $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/; 437 if (defined $sortmeta && $groupsize > 1) { 438 &gsprintf($out, "{export.cannot_sort}\n\n"); 439 $sortmeta = undef; 440 } 441 442 # get the list of plugins for this collection and set any options that 443 # were specified in the collect.cfg (all export.pl options except 444 # -collectdir, -out and -faillog may be specified in the collect.cfg (these 445 # options must be known before we read the collect.cfg)) 446 my $plugins = []; 447 my @global_opts = (); 448 449 # Read in the collection configuration file. 450 ($configfilename, $gs_mode) = &colcfg::get_collect_cfg_name($out); 451 452 if ($gs_mode eq "gs2") { 453 $collectcfg = &colcfg::read_collect_cfg ($configfilename); 454 } elsif ($gs_mode eq "gs3") { 455 $collectcfg = &colcfg::read_collection_cfg_xml ($configfilename); 456 } 457 458 if (defined $collectcfg->{'importdir'} && $importdir eq "") { 459 $importdir = $collectcfg->{'importdir'}; 460 } 461 if (defined $collectcfg->{'exportdir'} && $exportdir eq "") { 462 $exportdir = $collectcfg->{'exportdir'}; 463 } 464 465 # fill in the default import and export directories if none 466 # were supplied, turn all \ into / and remove trailing / 467 $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq ""; 468 $importdir =~ s/[\\\/]+/\//g; 469 $importdir =~ s/\/$//; 470 if (!-e $importdir) { 471 &gsprintf($out, "{import.no_import_dir}\n\n", $importdir); 472 die "\n"; 473 } 474 475 $exportdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "export") if $exportdir eq ""; 476 $exportdir =~ s/[\\\/]+/\//g; 477 $exportdir =~ s/\/$//; 478 479 if (defined $collectcfg->{'plugin'}) { 480 $plugins = $collectcfg->{'plugin'}; 481 } 482 483 if ($verbosity !~ /\d+/) { 484 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) { 485 $verbosity = $collectcfg->{'verbosity'}; 486 } else { 487 $verbosity = 2; # the default 488 } 489 } 490 491 if (defined $collectcfg->{'gzip'} && !$gzip) { 492 if ($collectcfg->{'gzip'} =~ /^true$/i) { 493 $gzip = 1; 494 } 495 } 496 if ($maxdocs !~ /\-?\d+/) { 497 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) { 498 $maxdocs = $collectcfg->{'maxdocs'}; 499 } else { 500 $maxdocs = -1; # the default 501 } 502 } 503 if ($groupsize == 1) { 504 if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) { 505 $groupsize = $collectcfg->{'groupsize'}; 506 } 507 } 508 if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental|assigned|dirname)$/)) { 509 if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental)$/) { 510 $OIDtype = $collectcfg->{'OIDtype'}; 511 } else { 512 $OIDtype = "hash"; # the default 513 } 514 } 515 if (defined $collectcfg->{'sortmeta'} && $sortmeta eq "") { 516 $sortmeta = $collectcfg->{'sortmeta'}; 517 } 518 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) { 519 $debug = 1; 520 } 521 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) { 522 $gli = 1; 523 } 524 525 # global plugin stuff 526 if (defined $collectcfg->{'separate_cjk'}&& $collectcfg->{'separate_cjk'} =~ /^true$/i) { 527 push @global_opts, "-separate_cjk"; 528 } 529 530 ($removeold, $keepold) = &scriptutil::check_removeold_and_keepold($global_removeold, $global_keepold, 0, "export", $collectcfg); 531 532 $gli = 0 unless defined $gli; 533 534 print STDERR "<export>\n" if $gli; 535 536 # fill in the default import and export directories if none 537 # were supplied, turn all \ into / and remove trailing / 538 $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq ""; 539 $importdir =~ s/[\\\/]+/\//g; 540 $importdir =~ s/\/$//; 541 $exportdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "export") if $exportdir eq ""; 542 $exportdir =~ s/[\\\/]+/\//g; 543 $exportdir =~ s/\/$//; 544 545 # load all the plugins 546 $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts); 547 548 if (scalar(@$pluginfo) == 0) { 549 &gsprintf($out, "{import.no_plugins_loaded}\n"); 550 die "\n"; 551 } 552 553 # remove the old contents of the export directory if needed 554 if ($removeold && -e $exportdir) { 555 &gsprintf($out, "{export.removing_export}\n"); 556 &util::rm_r ($exportdir); 557 } 558 559 # read the export information file 560 561 # Export to DSpace Archive format or METs format 562 # If saveas=DSpace, a "contents" file will be created, otherwise "export.inf" 563 564 # the plugouts should be doing this!! 565 if ($saveas eq "DSpace"){ 566 $export_info_filename = &util::filename_cat ($exportdir, "contents"); 567 } elsif ($saveas =~ m/^.*METS$/ || $saveas eq "GA" || $saveas eq "MARC" ) { 568 $export_info_filename = &util::filename_cat ($exportdir, "export.inf"); 569 } 570 571 $export_info = new arcinfo(); 572 $export_info -> load_info ($export_info_filename); 573 574 my ($plugout); 575 if (defined $collectcfg->{'plugout'}) { 576 $plugout = $collectcfg->{'plugout'}; 577 } 578 else{ 579 if ($saveas !~ /^(GA|.*METS|DSpace|MARCXML)$/) { 580 push @$plugout,"GAPlugout"; 581 } 582 else{ 583 push @$plugout,$saveas."Plugout"; 584 } 585 } 586 587 my $plugout_name = $plugout->[0]; 588 589 push @$plugout,("-output_info",$export_info) if (defined $export_info); 590 push @$plugout,("-verbosity",$verbosity) if (defined $verbosity); 591 push @$plugout,("-debug") if ($debug); 592 push @$plugout,("-gzip_output",$gzip) if (defined $gzip); 593 push @$plugout,("-group_size",$groupsize) if (defined $groupsize); 594 push @$plugout,("-output_handle",$out) if (defined $out); 595 push @$plugout,("-xslt_file",$xsltfile) if (defined $xsltfile); 596 push @$plugout,("-group") if ($group_marc && $plugout_name =~ m/^MARCXMLPlugout$/); 597 push @$plugout,("-mapping_file",$mapping_file) if (defined $mapping_file && $plugout_name =~ m/^MARCXMLPlugout$/); 598 push @$plugout,("-xslt_mets",$xslt_mets) if (defined $xslt_mets && $plugout_name =~ m/^.*METSPlugout$/); 599 push @$plugout,("-xslt_txt",$xslt_txt) if (defined $xslt_txt && $plugout_name =~ m/^.*METSPlugout$/); 600 push @$plugout,("-fedora_namespace",$fedora_namespace) if (defined $fedora_namespace && $plugout_name eq "FedoraMETSPlugout"); 601 602 $processor = &plugout::load_plugout($plugout); 603 604 $processor->setoutputdir ($exportdir); 605 606 $processor->set_sortmeta ($sortmeta) if defined $sortmeta; 607 $processor->set_OIDtype ($OIDtype, $OIDmetadata); 608 609 &plugin::begin($pluginfo, $importdir, $processor, $maxdocs); 610 611 # process the import directory 612 my $block_hash = {}; 613 my $metadata = {}; 614 # gobal blocking pass may set up some metadata 615 &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli); 616 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 617 618 619 if ($saveas eq "FedoraMETS") 620 { 621 # create collection "doc obj" for Fedora that contains 622 # collection-level metadata 623 624 my $doc_obj = new doc($configfilename,"nonindexed_doc"); 625 $doc_obj->set_OID("collection"); 626 627 my $col_name = undef; 628 my $col_meta = $collectcfg->{'collectionmeta'}; 629 630 if (defined $col_meta) 631 { 632 store_collectionmeta($col_meta,"collectionname",$doc_obj); # in GS3 this is a collection's name 633 store_collectionmeta($col_meta,"collectionextra",$doc_obj); # in GS3 this is a collection's description 634 635 } 636 $processor->process($doc_obj); 637 } 638 639 &plugin::end($pluginfo, $processor); 640 641 &plugin::deinit($pluginfo, $processor); 642 643 # write out the export information file 644 $processor->close_file_output() if $groupsize > 1; 645 $processor->close_group_output() if $processor->is_group(); 646 if ($saveas =~ m/^.*METS$/) { 647 $export_info->save_info($export_info_filename); 648 } 649 650 # write out export stats 651 my $close_stats = 0; 652 if ($statsfile !~ /^(STDERR|STDOUT)$/i) { 653 if (open (STATS, ">$statsfile")) { 654 $statsfile = 'import::STATS'; 655 $close_stats = 1; 656 } else { 657 &gsprintf($out, "{import.cannot_open_stats_file}", $statsfile); 658 &gsprintf($out, "{import.stats_backup}\n"); 659 $statsfile = 'STDERR'; 660 } 661 } 662 663 &gsprintf($out, "\n"); 664 &gsprintf($out, "*********************************************\n"); 665 666 &plugin::write_stats($pluginfo, $statsfile, $faillogname, $gli); 667 if ($close_stats) { 668 close STATS; 669 } 670 671 &gsprintf($out, "*********************************************\n"); 672 673 close OUT if $close_out; 674 675 close FAILLOG; 676 }; 677 678 if ($@) { 679 print STDERR $@; 680 } 681 682 ## $ENV{'GSDLCOLLECTION'} = undef; 683 $importdir = ""; 684 $removeold = 0 if ($explicit_exportdir); 685 686 } # while processing ARGV 687 688 &gsprintf($out, "\n"); 689 &gsprintf($out, "*********************************************\n"); 690 &gsprintf($out, "* {export.complete}\n"); 691 &gsprintf($out, "*********************************************\n"); 692 693 } 655 656 -
gsdl/trunk/bin/script/import.pl
r17038 r17142 253 253 'modegli' => "4" }, 254 254 { 'name' => "gli", 255 'desc' => " ",255 'desc' => "{scripts.gli}", 256 256 'type' => "flag", 257 257 'reqd' => "no", … … 273 273 # params 274 274 my ($language, $verbosity, $debug, 275 $importdir, $site, $manifest, $incremental, $keepold, 276 $removeold, $saveas, 275 $collectdir, $importdir, $archivedir, $site, $manifest, 276 $incremental, $keepold, $removeold, 277 $saveas, 277 278 $OIDtype, $OIDmetadata, 278 279 $maxdocs, $statsfile, 279 $out, $faillog, $collectdir, $gli, 280 281 $archivedir, 280 $out, $faillog, $gli, 282 281 $gzip, $groupsize, 283 $sortmeta, $reversesort, $removeprefix, $removesuffix ,282 $sortmeta, $reversesort, $removeprefix, $removesuffix 284 283 ); 285 284 … … 287 286 288 287 # other vars 289 my ($configfilename, $collect cfg, $collection,288 my ($configfilename, $collection, $collectcfg, 290 289 $archive_info_filename, $archive_info, 291 290 $gs_mode, … … 331 330 if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/)) 332 331 { 333 &PrintUsage::print_txt_usage($options, "{ buildcol.params}");332 &PrintUsage::print_txt_usage($options, "{import.params}"); 334 333 die "\n"; 335 334 } … … 432 431 } 433 432 434 if ( $OIDtype !~ /^(hash|incremental|assigned|dirname)$/) {433 if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental|assigned|dirname)$/ )) { 435 434 if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental|assigned|dirname)$/) { 436 435 $OIDtype = $collectcfg->{'OIDtype'}; 437 436 } else { 438 437 $OIDtype = "hash"; # the default 439 }440 }441 442 my ($plugout);443 if (defined $collectcfg->{'plugout'}) {444 $plugout = $collectcfg->{'plugout'};445 }446 else{447 if ($saveas !~ /^(GA|.*METS|DSpace|MARCXML)$/) {448 push @$plugout,"GAPlugout";449 }450 else{451 push @$plugout,$saveas."Plugout";452 438 } 453 439 } … … 484 470 $gli = 1; 485 471 } 486 487 488 # global plugin stuff 489 if (defined $collectcfg->{'separate_cjk'} && $collectcfg->{'separate_cjk'} =~ /^true$/i) { 490 push @global_opts, "-separate_cjk"; 491 } 492 472 $gli = 0 unless defined $gli; 473 493 474 # check keepold and removeold 494 475 ($removeold, $keepold, $incremental) = &scriptutil::check_removeold_and_keepold($removeold, $keepold, $incremental, "archives", $collectcfg); 495 476 496 $gli = 0 unless defined $gli;497 477 498 478 print STDERR "<Import>\n" if $gli; … … 538 518 539 519 # read the archive information file 540 541 # If saveas=DSpace, a "contents" file will be created, otherwise "archives.inf" 542 543 # the plugouts should be doing this!! 544 if ($saveas eq "DSpace"){ 545 $archive_info_filename = &util::filename_cat ($archivedir, "contents"); 546 } elsif ($saveas =~ m/^.*METS$/ || $saveas eq "GA" || $saveas eq "MARC" ) { 547 $archive_info_filename = &util::filename_cat ($archivedir, "archives.inf"); 548 } 549 520 $archive_info_filename = &util::filename_cat ($archivedir, "archives.inf"); 521 550 522 $archive_info = new arcinfo (); 551 523 $archive_info->load_info ($archive_info_filename); … … 555 527 556 528 ####Use Plugout#### 529 my ($plugout); 530 if (defined $collectcfg->{'plugout'} && $collectcfg->{'plugout'} =~ /^(GA|GreenstoneMETS)Plugout/) { 531 $plugout = $collectcfg->{'plugout'}; 532 } 533 else{ 534 if ($saveas !~ /^(GA|GreenstoneMETS)$/) { 535 push @$plugout,"GAPlugout"; 536 } 537 else{ 538 push @$plugout,$saveas."Plugout"; 539 } 540 } 541 557 542 push @$plugout,("-output_info",$archive_info) if (defined $archive_info); 558 543 push @$plugout,("-verbosity",$verbosity) if (defined $verbosity); … … 590 575 } 591 576 } 592 593 if ($saveas eq "FedoraMETS")594 {595 # This would be better (should?!) be done in Plugout!!596 597 # create collection "doc obj" for Fedora that contains598 # collection-level metadata599 600 my $doc_obj = new doc($configfilename,"nonindexed_doc");601 $doc_obj->set_OID("collection");602 603 my $col_name = undef;604 my $col_meta = $collectcfg->{'collectionmeta'};605 if (defined $col_meta)606 {607 store_collectionmeta($col_meta,"collectionname",$doc_obj);608 store_collectionmeta($col_meta,"collectioextra",$doc_obj);609 610 }611 612 $processor->process($doc_obj);613 }614 615 577 616 578 &plugin::end($pluginfo, $processor);
Note:
See TracChangeset
for help on using the changeset viewer.