Changeset 23053
- Timestamp:
- 2010-10-06T15:39:33+13:00 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/inexport.pm
r23042 r23053 375 375 376 376 my $manifest_lookup = new manifest($collectcfg->{'infodbtype'},$archivedir); 377 if ($self->{'manifest'} ne "") { 377 if ($self->{'manifest'} ne "") { 378 print STDERR "parsing manifest\n"; 378 379 my $manifest_filename = $self->{'manifest'}; 379 380 … … 396 397 } 397 398 399 my $plugin_incr_mode = $incremental_mode; 400 if ($manifest ne "") { 401 # if we have a manifest file, then we pretend we are fully incremental for plugins 402 $plugin_incr_mode = "all"; 403 } 398 404 #some global options for the plugins 399 405 my @global_opts = (); 400 406 401 my $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts, $ incremental_mode);407 my $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts, $plugin_incr_mode); 402 408 if (scalar(@$pluginfo) == 0) { 403 409 &gsprintf($out, "{import.no_plugins_loaded}\n"); … … 424 430 425 431 # read the archive information file 426 ## my $arcinfo_doc_filename = &util::filename_cat ($archivedir, "archives.inf");427 432 428 433 # BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files (won't do anything for other infodbtypes) … … 509 514 &plugin::remove_all($pluginfo, $importdir, $processor, $maxdocs, $gli); 510 515 } 511 if ($manifest eq "") { 512 # process the import directory 513 my $block_hash = {}; 514 my $metadata = {}; 515 # gobal blocking pass may set up some metadata 516 &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli); 516 517 # process the import directory 518 my $block_hash = {}; 519 $block_hash->{'new_files'} = {}; 520 $block_hash->{'reindex_files'} = {}; 521 my $metadata = {}; 522 523 # gobal blocking pass may set up some metadata 524 &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli); 525 526 if ($manifest ne "") { 527 # 528 # 1. Process delete files first 529 # 530 531 my @deleted_files = keys %{$manifest_lookup->{'delete'}}; 532 my @full_deleted_files = (); 533 534 # ensure all filenames are absolute 535 foreach my $df (@deleted_files) { 536 #print STDERR "**delete file $df\n"; 537 my $full_df = 538 (&util::filename_is_absolute($df)) 539 ? $df 540 : &util::filename_cat($importdir,$df); 541 542 if (-d $full_df) { 543 &add_dir_contents_to_list($full_df, \@full_deleted_files); 544 } else { 545 push(@full_deleted_files,$full_df); 546 } 547 } 548 549 &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@full_deleted_files); 550 mark_docs_for_deletion($archive_info,{}, 551 \@full_deleted_files, 552 $archivedir, $verbosity, "delete"); 553 554 555 # 556 # 2. Now files for reindexing 557 # 558 559 my @reindex_files = keys %{$manifest_lookup->{'reindex'}}; 560 my @full_reindex_files = (); 561 562 # ensure all filenames are absolute 563 foreach my $rf (@reindex_files) { 564 my $full_rf = 565 (&util::filename_is_absolute($rf)) 566 ? $rf 567 : &util::filename_cat($importdir,$rf); 568 569 if (-d $full_rf) { 570 &add_dir_contents_to_list($full_rf, \@full_reindex_files); 571 } else { 572 push(@full_reindex_files,$full_rf); 573 } 574 } 575 576 &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@full_reindex_files); 577 mark_docs_for_deletion($archive_info,{},\@full_reindex_files, $archivedir,$verbosity, "reindex"); 578 579 # And now to ensure the new version of the file processed by 580 # appropriate plugin, we need to add it to block_hash reindex list 581 foreach my $full_rf (@full_reindex_files) { 582 #print STDERR "***reindex file $full_rf\n"; 583 $block_hash->{'reindex_files'}->{$full_rf} = 1; 584 } 585 586 587 # 588 # 3. Now finally any new files - add to block_hash new_files list 589 # 590 591 my @new_files = keys %{$manifest_lookup->{'index'}}; 592 my @full_new_files = (); 593 594 foreach my $nf (@new_files) { 595 #print STDERR "***index file $nf\n"; 596 # ensure filename is absolute 597 my $full_nf = 598 (&util::filename_is_absolute($nf)) 599 ? $nf 600 : &util::filename_cat($importdir,$nf); 601 602 if (-d $full_nf) { 603 &add_dir_contents_to_list($full_nf, \@full_new_files); 604 } else { 605 push(@full_new_files,$full_nf); 606 } 607 } 608 609 foreach my $f (@full_new_files) { 610 $block_hash->{'new_files'}->{$f} = 1; 611 } 612 } 613 else { 614 # if incremental, we read through the import folder to see whats changed. 517 615 518 616 if ($incremental || $incremental_mode eq "onlyadd") { 519 520 617 prime_doc_oid_count($archivedir); 521 618 … … 571 668 } 572 669 573 } 574 575 # Play it safe, and run through the entire folder, only processing new or edited files 576 577 if ((defined $jobs) && ($jobs > 1)) 578 { 579 # if jobs are set to >1, run in parallel using MPI helper 580 # [hs, 1 july 2010] 581 &ParallelInexport::farm_out_processes($jobs, $epoch, $importdir, $block_hash, 582 $self->{'collection'}, $self->{'site'}); 583 } 584 else 585 { 586 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 587 } 588 } 589 else { 590 if ((defined $jobs) && ($jobs > 1)) 591 { 592 # if jobs are set to >1, run in parallel using MPI helper 593 # [hs, 1 july 2010] 594 &ParallelInexport::farm_out_processes($jobs, $epoch, $importdir, $block_hash, 595 $self->{'collection'}, $self->{'site'}); 596 } 597 else 598 { 599 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 600 } 601 } 602 670 } 671 } 672 } 673 674 # now, whichever mode we are in, we can process the entire import folder 675 if ((defined $jobs) && ($jobs > 1)) 676 { 677 # if jobs are set to >1, run in parallel using MPI helper 678 # [hs, 1 july 2010] 679 &ParallelInexport::farm_out_processes($jobs, $epoch, $importdir, $block_hash, 680 $self->{'collection'}, $self->{'site'}); 603 681 } 604 682 else 605 683 { 606 # 607 # 1. Process delete files first 608 # 609 610 my @deleted_files = keys %{$manifest_lookup->{'delete'}}; 611 my @full_deleted_files = (); 612 613 # ensure all filenames are absolute 614 foreach my $df (@deleted_files) { 615 my $full_df = 616 (&util::filename_is_absolute($df)) 617 ? $df 618 : &util::filename_cat($importdir,$df); 619 620 push(@full_deleted_files,$full_df); 621 } 622 623 &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@full_deleted_files); 624 mark_docs_for_deletion($archive_info,{}, 625 \@full_deleted_files, 626 $archivedir, $verbosity, "delete"); 627 628 629 # 630 # 2. Now files for reindexing 631 # 632 633 my @reindex_files = keys %{$manifest_lookup->{'reindex'}}; 634 my @full_reindex_files = (); 635 636 # ensure all filenames are absolute 637 foreach my $rf (@reindex_files) { 638 my $full_rf = 639 (&util::filename_is_absolute($rf)) 640 ? $rf 641 : &util::filename_cat($importdir,$rf); 642 643 push(@full_reindex_files,$full_rf); 644 } 645 646 &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@full_reindex_files); 647 mark_docs_for_deletion($archive_info,{},\@full_reindex_files, $archivedir,$verbosity, "reindex"); 648 649 # And now ensure the new version of the file processed by appropriate 650 # plugin 651 foreach my $full_rf (@full_reindex_files) { 652 &plugin::read ($pluginfo, "", $full_rf, {}, {}, $processor, $maxdocs, 0, $gli); 653 } 654 655 656 # 657 # 3. Now finally any new files 658 # 659 660 foreach my $file (keys %{$manifest_lookup->{'index'}}) { 661 my $block_hash = {}; 662 my $metadata = {}; 663 &plugin::file_block_read($pluginfo, $importdir, $file, $block_hash, $metadata, $gli); 664 &plugin::read ($pluginfo, $importdir, $file, $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 665 } 666 667 668 } 669 684 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 685 } 686 687 670 688 if ($saveas eq "FedoraMETS") { 671 689 # create collection "doc obj" for Fedora that contains … … 1123 1141 } 1124 1142 1143 sub add_dir_contents_to_list { 1144 1145 my ($dirname, $list) = @_; 1146 1147 # Recur over directory contents. 1148 my (@dir, $subfile); 1149 1150 # find all the files in the directory 1151 if (!opendir (DIR, $dirname)) { 1152 print STDERR "inexport: WARNING - couldn't read directory $dirname\n"; 1153 return -1; # error in processing 1154 } 1155 @dir = readdir (DIR); 1156 closedir (DIR); 1157 1158 for (my $i = 0; $i < scalar(@dir); $i++) { 1159 my $subfile = $dir[$i]; 1160 next if ($subfile =~ m/^\.\.?$/); 1161 my $full_file = &util::filename_cat($dirname, $subfile); 1162 if (-d $full_file) { 1163 &add_dir_contents_to_list($full_file, $list); 1164 } else { 1165 push (@$list, $full_file); 1166 } 1167 } 1168 1169 } 1125 1170 1126 1171
Note:
See TracChangeset
for help on using the changeset viewer.