- Timestamp:
- 2009-02-01T14:35:22+13:00 (15 years ago)
- Location:
- gsdl/trunk/bin/script
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/bin/script/export.pl
r17142 r18440 288 288 # other vars 289 289 my ($configfilename, $collection, $collectcfg, 290 $exp ort_info_filename, $export_info,290 $expinfo_doc_filename, $export_info, 291 291 $gs_mode, 292 292 $processor, $pluginfo); … … 498 498 # the plugouts should be doing this!! 499 499 if ($saveas eq "DSpace"){ 500 $exp ort_info_filename = &util::filename_cat ($exportdir, "contents");500 $expinfo_doc_filename = &util::filename_cat ($exportdir, "contents"); 501 501 } elsif ($saveas =~ m/^.*METS$/ || $saveas eq "MARC" ) { 502 $export_info_filename = &util::filename_cat ($exportdir, "export.inf"); 502 ## $expinfo_doc_filename = &util::filename_cat ($exportdir, "export.inf"); 503 my $db_ext = &util::is_little_endian() ? ".ldb" : ".bdb"; 504 my $doc_db = "archiveinf-doc$db_ext"; 505 $expinfo_doc_filename = &util::filename_cat ($exportdir, $doc_db); 503 506 } 504 507 505 508 $export_info = new arcinfo(); 506 $export_info -> load_info ($exp ort_info_filename);509 $export_info -> load_info ($expinfo_doc_filename); 507 510 508 511 my ($plugout); … … 550 553 else { 551 554 # process any files marked for exporting 552 foreach my $file (keys %{$manifest_lookup->{' export'}}) {555 foreach my $file (keys %{$manifest_lookup->{'index'}}) { 553 556 &plugin::read ($pluginfo, $importdir, $file, {}, {}, $processor, $maxdocs, 0, $gli); 554 557 } … … 581 584 #$processor->close_file_output() if $groupsize > 1; 582 585 $processor->close_group_output() if $processor->is_group(); 583 # why do we need this?? 584 if ($saveas =~ m/^.*METS$/) { 585 $export_info->save_info($export_info_filename); 586 587 if (($saveas =~ m/^.*METS$/) || ($saveas eq "MARC")) { 588 # Not all export types need this (e.g. DSpace) 589 $export_info->save_info($expinfo_doc_filename); 586 590 } 587 591 -
gsdl/trunk/bin/script/import.pl
r17751 r18440 287 287 # other vars 288 288 my ($configfilename, $collection, $collectcfg, 289 $arc hive_info_filename, $archive_info,289 $arcinfo_doc_filename, $arcinfo_src_filename, $archive_info, 290 290 $gs_mode, 291 291 $processor, $pluginfo); … … 518 518 519 519 # read the archive information file 520 $archive_info_filename = &util::filename_cat ($archivedir, "archives.inf"); 520 ## $arcinfo_doc_filename = &util::filename_cat ($archivedir, "archives.inf"); 521 522 my $db_ext = &util::is_little_endian() ? ".ldb" : ".bdb"; 523 my $doc_db = "archiveinf-doc$db_ext"; 524 my $src_db = "archiveinf-src$db_ext"; 525 $arcinfo_doc_filename = &util::filename_cat ($archivedir, $doc_db); 526 $arcinfo_src_filename = &util::filename_cat ($archivedir, $src_db); 521 527 522 528 $archive_info = new arcinfo (); 523 $archive_info->load_info ($arc hive_info_filename);529 $archive_info->load_info ($arcinfo_doc_filename); 524 530 if ($reversesort) { 525 531 $archive_info->reverse_sort(); 532 } 533 534 if ($manifest eq "") { 535 # Load in list of files in import folder from last import (if present) 536 $archive_info->load_import_filelist ($arcinfo_src_filename); 526 537 } 527 538 … … 560 571 # gobal blocking pass may set up some metadata 561 572 &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli); 573 # Can now work out which files were deleted 574 575 # First convert all files to absolute form 576 # This is to support the situation where the import folder is not 577 # the default 578 579 my $prev_all_files = $archive_info->{'import_filelist'}; 580 foreach my $prev_file (keys %$prev_all_files) { 581 582 if (!&util::filename_is_absolute($prev_file)) { 583 my $full_prev_file = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},$prev_file); 584 delete $prev_all_files->{$prev_file}; 585 $prev_all_files->{$full_prev_file} = 1; 586 } 587 } 588 589 # Figure out which are the new files, existing files and so 590 # by implication the files from the previous import that are not 591 # there any more => mark them for deletion 592 foreach my $curr_file (keys %{$block_hash->{'all_files'}}) { 593 594 my $full_curr_file = $curr_file; 595 596 if (!&util::filename_is_absolute($curr_file)) { 597 # add in import dir to make absolute 598 $full_curr_file = &util::filename_cat($importdir,$curr_file); 599 } 600 601 ## print STDERR "**** Checking $curr_file\n"; 602 603 # figure of if new file or not 604 if (defined $prev_all_files->{$full_curr_file}) { 605 # had it before 606 $block_hash->{'existing_files'}->{$curr_file} = 1; 607 # Now remove it, so by end of loop only the files 608 # that need deleting are left 609 610 delete $prev_all_files->{$full_curr_file} 611 } 612 else { 613 $block_hash->{'new_files'}->{$curr_file} = 1; 614 } 615 616 delete $block_hash->{'all_files'}->{$curr_file}; 617 } 618 619 print STDERR "Delete files:\n "; 620 621 my @delete_files = keys %$prev_all_files; 622 print STDERR join("\n ",@delete_files), "\n"; 623 562 624 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 563 625 } … … 565 627 { 566 628 # process any files marked for importing 567 foreach my $file (keys %{$manifest_lookup->{'i mport'}}) {629 foreach my $file (keys %{$manifest_lookup->{'index'}}) { 568 630 &plugin::read ($pluginfo, $importdir, $file, {}, {}, $processor, $maxdocs, 0, $gli); 569 631 } … … 571 633 # record files marked for deletion in arcinfo 572 634 foreach my $file (keys %{$manifest_lookup->{'delete'}}) { 573 # consider finding it? 635 # use 'archiveinf-src' GDBM file to look up all the OIDs 636 # this file is used in (note in most cases, it's just one OID) 637 638 # An improvement would be to have the record read 639 # into a hash array 640 # gdbmRecordToHash 641 642 my $gdbm_val 643 = &GDBMUtil::gdbmDatabaseGet($arcinfo_src_filename,$file); 644 645 my @oids = ($gdbm_val =~ m/^<oid>(.*)$/gm); 646 foreach my $oid (@oids) { 647 648 # find out if it's an assoc file or main doc 649 650 # archiveinf-doc, lookup $oid 651 # if "doc-file" 652 # mark it for deletion 653 654 # else (assoc file) 655 # mark all for re-indexing 656 657 # Now delete file 658 } 659 574 660 # $archive_info->add_info($OID,$doc_xml_file,"D"); 575 661 } … … 585 671 586 672 # The following 'if' statement is in the export.pl version of the script, 587 # but not (so far) the import.pl version. Why is this? 588 ## if ($saveas =~ m/^.*METS$/) { 673 # The reason for the 'if' statement is now given in export.pl 674 # Unclear at this point if the same should be done here 675 ## if (($saveas =~ m/^.*METS$/) || ($saveas eq "MARC")) { 676 # Not all export types need this (e.g. DSpace) 677 589 678 # should we still do this in debug mode?? 590 679 591 $archive_info->save_info($archive_info_filename); 680 # for backwards compatability with archvies.inf file 681 if ($arcinfo_doc_filename =~ m/\.inf$/) { 682 $archive_info->save_info($arcinfo_doc_filename); 683 } 684 592 685 ## } 593 686 -
gsdl/trunk/bin/script/lucene_passes.pl
r16264 r18440 131 131 $doc_xml = ""; 132 132 $output_filename = ""; 133 } 134 elsif ($line =~ m/<\/Delete>\s*$/) { 135 if ($mode eq "index") { 136 $doc_xml =~ s/\n+/\n/g; 137 138 # notify lucene indexer 139 print PIPEOUT "$doc_xml"; 140 } 141 $doc_xml = ""; 133 142 } 134 143 }
Note:
See TracChangeset
for help on using the changeset viewer.