Changeset 20616
- Timestamp:
- 2009-09-16T15:55:57+12:00 (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/bin/script/export.pl
r20571 r20616 27 27 28 28 29 # This program will export a particular collection into a specific Format (e.g. METS or DSpace) 29 # This program will export a particular collection into a specific Format (e.g. METS or DSpace) by importing then saving as a different format. 30 30 31 31 package export; … … 36 36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib"); 37 37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan"); 38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8"); 38 39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins"); 39 40 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts"); … … 61 62 use plugout; 62 63 use manifest; 64 use inexport; 63 65 use util; 64 66 use scriptutil; … … 143 145 'reqd' => "no", 144 146 'hiddengli' => "yes" }, 145 { 'name' => "listall",146 'desc' => "{export.listall}",147 'type' => "flag",148 'reqd' => "no" },149 147 { 'name' => "debug", 150 148 'desc' => "{export.debug}", … … 159 157 'modegli' => "3" }, 160 158 # does this make sense? 161 #{ 'name' => "incremental",162 #'desc' => "{import.incremental}",163 #'type' => "flag",164 #'hiddengli' => "yes" },159 { 'name' => "incremental", 160 'desc' => "{import.incremental}", 161 'type' => "flag", 162 'hiddengli' => "yes" }, 165 163 { 'name' => "keepold", 166 164 'desc' => "{export.keepold}", … … 172 170 'type' => "flag", 173 171 'reqd' => "no", 174 ' modegli' => "3" },172 'hiddengli' => "yes" }, 175 173 { 'name' => "language", 176 174 'desc' => "{scripts.language}", 177 175 'type' => "string", 178 176 'reqd' => "no", 179 ' modegli' => "3" },177 'hiddengli' => "yes" }, 180 178 { 'name' => "maxdocs", 181 179 'desc' => "{export.maxdocs}", … … 254 252 'reqd' => "no", 255 253 'hiddengli' => "yes" }, 254 { 'name' => "listall", 255 'desc' => "{export.listall}", 256 'type' => "flag", 257 'reqd' => "no" }, 256 258 { 'name' => "xml", 257 259 'desc' => "{scripts.xml}", … … 280 282 $OIDtype, $OIDmetadata, 281 283 $maxdocs, $statsfile, 284 $gzip, 282 285 $out, $faillog, $gli, $listall, 283 286 # plugout specific ones … … 289 292 # other vars 290 293 my ($configfilename, $collection, $collectcfg, 291 $expinfo_doc_filename, $exp ort_info,294 $expinfo_doc_filename, $expinfo_src_filename, $export_info, 292 295 $gs_mode, 293 296 $processor, $pluginfo); … … 311 314 } 312 315 313 314 # these are options used by other things - we just set default values 315 # undef means will be set from config file if there 316 my $gzip = undef; 317 316 318 317 # If $language has been specified, load the appropriate resource bundle 319 318 # (Otherwise, the default resource bundle will be loaded automatically) … … 420 419 } 421 420 421 if (defined $collectcfg->{'manifest'} && $manifest eq "") { 422 $manifest = $collectcfg->{'manifest'}; 423 } 422 424 if (defined $collectcfg->{'gzip'} && !$gzip) { 423 425 if ($collectcfg->{'gzip'} =~ /^true$/i) { … … 433 435 } 434 436 437 # groupsize is in import - does it make sense here?? 438 435 439 if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental|assigned|dirname)$/)) { 436 440 if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental|assigned|dirname)$/) { … … 441 445 } 442 446 447 if ((!defined $OIDmetadata) || ($OIDmetadata eq "")) { 448 if (defined $collectcfg->{'OIDmetadata'}) { 449 $OIDmetadata = $collectcfg->{'OIDmetadata'}; 450 } else { 451 $OIDmetadata = "dc.Identifier"; # the default 452 } 453 } 454 443 455 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) { 444 456 $debug = 1; … … 496 508 } elsif ($saveas =~ m/^.*METS$/ || $saveas eq "MARCXML" ) { 497 509 ## $expinfo_doc_filename = &util::filename_cat ($exportdir, "export.inf"); 498 my $doc_db = "archiveinf-doc"; 499 $expinfo_doc_filename = &util::filename_cat ($exportdir, $doc_db); 510 $expinfo_doc_filename = &util::filename_cat ($exportdir,"archiveinf-doc" ); 500 511 &util::rename_gdbm_file($expinfo_doc_filename); # ensures gdb in case we have an existing legacy ldb one - can this happen? 501 512 $expinfo_doc_filename .= ".gdb"; 513 514 $expinfo_src_filename = &util::filename_cat ($exportdir,"archiveinf-src" ); 515 &util::rename_gdbm_file($expinfo_src_filename); # ensures gdb in case we have an existing legacy ldb one - can this happen? 516 $expinfo_src_filename .= ".gdb"; 502 517 503 518 } … … 506 521 $export_info -> load_info ($expinfo_doc_filename); 507 522 523 if ($manifest eq "") { 524 # Load in list of files in export folder from last export (if present) 525 $export_info->load_prev_import_filelist ($expinfo_src_filename); 526 } 527 508 528 my ($plugout); 509 529 if (defined $collectcfg->{'plugout'} && $collectcfg->{'plugout'} =~ /^(.*METS|DSpace|MARCXML)Plugout/) { … … 511 531 } 512 532 else{ 513 if ($saveas !~ /^( .*METS|DSpace|MARCXML)$/) {533 if ($saveas !~ /^(GreenstoneMETS|FedoraMETS|DSpace|MARCXML)$/) { 514 534 push @$plugout,"GreenstoneMETSPlugout"; 515 535 } … … 524 544 push @$plugout,("-verbosity",$verbosity) if (defined $verbosity); 525 545 push @$plugout,("-debug") if ($debug); 526 push @$plugout,("-gzip_output" ,$gzip) if (defined$gzip);546 push @$plugout,("-gzip_output") if ($gzip); 527 547 push @$plugout,("-output_handle",$out) if (defined $out); 528 548 push @$plugout,("-xslt_file",$xsltfile) if (defined $xsltfile && $xsltfile ne ""); … … 546 566 # gobal blocking pass may set up some metadata 547 567 &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli); 548 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 568 #&plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 569 ### section below copied from import.pl 570 if ($incremental) { 571 # equivalent to saying ($keepold && ($incremental_mode eq "all")) 572 573 &inexport::prime_doc_oid_count($exportdir); 574 575 576 # Can now work out which files were new, already existed, and have 577 # been deleted 578 579 &inexport::new_vs_old_import_diff($export_info,$block_hash,$importdir, 580 $exportdir,$verbosity,$incremental_mode); 581 582 my @deleted_files = sort keys %{$block_hash->{'deleted_files'}}; 583 # Filter out any in gsdl/tmp area 584 my @filtered_deleted_files = (); 585 my $gsdl_tmp_area = &util::filename_cat($ENV{'GSDLHOME'}, "tmp"); 586 my $collect_tmp_area = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp"); 587 $gsdl_tmp_area = &util::filename_to_regex($gsdl_tmp_area); 588 $collect_tmp_area = &util::filename_to_regex($collect_tmp_area); 589 590 591 foreach my $df (@deleted_files) { 592 next if ($df =~ m/^$gsdl_tmp_area/); 593 next if ($df =~ m/^$collect_tmp_area/); 594 595 push(@filtered_deleted_files,$df); 596 } 597 598 599 @deleted_files = @filtered_deleted_files; 600 601 if (scalar(@deleted_files>0)) { 602 print STDERR "Files deleted since last import:\n "; 603 print STDERR join("\n ",@deleted_files), "\n"; 604 } 605 606 my @new_files = sort keys %{$block_hash->{'new_files'}}; 607 if (scalar(@new_files>0)) { 608 print STDERR "New files since last import:\n "; 609 print STDERR join("\n ",@new_files), "\n"; 610 } 611 612 &inexport::mark_docs_for_deletion($export_info,$block_hash,\@deleted_files, 613 $exportdir,$verbosity); 614 615 &inexport::mark_docs_for_reindex($export_info,$block_hash, 616 $exportdir,$verbosity); 617 618 my @reindex_files = sort keys %{$block_hash->{'reindex_files'}}; 619 620 if (scalar(@reindex_files>0)) { 621 print STDERR "Files to reindex since last import:\n "; 622 print STDERR join("\n ",@reindex_files), "\n"; 623 } 624 625 626 # not sure if the following will work -- will the metadata data-structure be correctly initialized 627 # in the right order? 628 # foreach my $file (@new_files, @reindex_files) { 629 # &plugin::read ($pluginfo, $importdir, $file, $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 630 # } 631 632 633 # Play it safe, and run through the entire folder, only processing new or edited files 634 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 635 636 } 637 else { 638 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 639 } 640 641 ### end copy 549 642 } 550 643 else { … … 553 646 &plugin::read ($pluginfo, $importdir, $file, {}, {}, $processor, $maxdocs, 0, $gli); 554 647 } 648 649 my @deleted_files = keys %{$manifest_lookup->{'delete'}}; 650 651 &inexport::mark_docs_for_deletion($export_info,{},\@deleted_files,$exportdir); 652 555 653 } 556 654 … … 578 676 &plugin::deinit($pluginfo, $processor); 579 677 678 # Store the value of OIDCount (used in doc.pm) so it can be 679 # restored correctly to this value on an incremental build 680 &inexport::store_doc_oid_count($exportdir); 681 580 682 # write out the export information file 581 683 #$processor->close_file_output() if $groupsize > 1; … … 587 689 ## $export_info->save_info($expinfo_doc_filename); 588 690 # } 589 590 my $expinfo_src_filename = &util::filename_cat ($exportdir, "archiveinf-src");591 &util::rename_gdbm_file($expinfo_src_filename); # ensures gdb592 $expinfo_src_filename .= ".gdb";593 691 594 692
Note:
See TracChangeset
for help on using the changeset viewer.