Changeset 20685


Ignore:
Timestamp:
2009-09-23T13:47:49+12:00 (15 years ago)
Author:
kjdon
Message:

removed classify lines from INC - we don't do classifying during import. while processing the import directory, do the prime_doc_oid_count and new_vs_old-import_diff when incremental_mode is onlyadd, as well as for incremental. Otherwise, if we do -keepold, we get no docs imported

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/bin/script/import.pl

    r20571 r20685  
    3939    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
    4040    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
    41     unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
    4241
    4342    if (defined $ENV{'GSDLEXTS'}) {
     
    5049        unshift (@INC, "$ext_prefix/perllib/plugins");
    5150        unshift (@INC, "$ext_prefix/perllib/plugouts");
    52         unshift (@INC, "$ext_prefix/perllib/classify");
    5351    }
    5452    }
     
    198196    'desc' => "{import.OIDmetadata}",
    199197    'type' => "string",
    200     # 'type' => "metadata", #doesn't work properly in GLI
     198     #'type' => "metadata", #doesn't work properly in GLI
    201199    # parsearg left "" as default
    202200    #'deft' => "dc.Identifier",
     
    589587
    590588
    591     if ($incremental) {
    592         # equivalent to saying ($keepold && ($incremental_mode eq "all"))
     589    if ($incremental || $incremental_mode eq "onlyadd") {
    593590
    594591        &inexport::prime_doc_oid_count($archivedir);
     
    601598                          $archivedir,$verbosity,$incremental_mode);
    602599       
    603         my @deleted_files = sort keys %{$block_hash->{'deleted_files'}};
    604         # Filter out any in gsdl/tmp area
    605         my @filtered_deleted_files = ();
    606         my $gsdl_tmp_area = &util::filename_cat($ENV{'GSDLHOME'}, "tmp");
    607         my $collect_tmp_area = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
    608         $gsdl_tmp_area = &util::filename_to_regex($gsdl_tmp_area);
    609         $collect_tmp_area = &util::filename_to_regex($collect_tmp_area);
    610 
    611 
    612         foreach my $df (@deleted_files) {
    613         next if ($df =~ m/^$gsdl_tmp_area/);
    614         next if ($df =~ m/^$collect_tmp_area/);
    615        
    616         push(@filtered_deleted_files,$df);
    617         }
    618 
    619        
    620         @deleted_files = @filtered_deleted_files;
    621 
    622         if (scalar(@deleted_files>0)) {
    623         print STDERR "Files deleted since last import:\n  ";
    624         print STDERR join("\n  ",@deleted_files), "\n";
    625         }
    626        
    627600        my @new_files = sort keys %{$block_hash->{'new_files'}};
    628601        if (scalar(@new_files>0)) {
     
    630603        print STDERR join("\n  ",@new_files), "\n";
    631604        }
    632        
    633         &inexport::mark_docs_for_deletion($archive_info,$block_hash,\@deleted_files,
    634                           $archivedir,$verbosity);
    635 
    636         &inexport::mark_docs_for_reindex($archive_info,$block_hash,
    637                          $archivedir,$verbosity);
    638 
    639         my @reindex_files = sort keys %{$block_hash->{'reindex_files'}};
    640 
    641         if (scalar(@reindex_files>0)) {
    642         print STDERR "Files to reindex since last import:\n  ";
    643         print STDERR join("\n  ",@reindex_files), "\n";
    644         }
    645 
    646 
    647         # not sure if the following will work -- will the metadata data-structure be correctly initialized
    648         # in the right order?
     605
     606        if ($incremental) {
     607               # only look for deletions if we are truely incremental
     608        my @deleted_files = sort keys %{$block_hash->{'deleted_files'}};
     609        # Filter out any in gsdl/tmp area
     610        my @filtered_deleted_files = ();
     611        my $gsdl_tmp_area = &util::filename_cat($ENV{'GSDLHOME'}, "tmp");
     612        my $collect_tmp_area = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
     613        $gsdl_tmp_area = &util::filename_to_regex($gsdl_tmp_area);
     614        $collect_tmp_area = &util::filename_to_regex($collect_tmp_area);
     615       
     616       
     617        foreach my $df (@deleted_files) {
     618            next if ($df =~ m/^$gsdl_tmp_area/);
     619            next if ($df =~ m/^$collect_tmp_area/);
     620           
     621            push(@filtered_deleted_files,$df);
     622        }
     623       
     624       
     625        @deleted_files = @filtered_deleted_files;
     626       
     627        if (scalar(@deleted_files>0)) {
     628            print STDERR "Files deleted since last import:\n  ";
     629            print STDERR join("\n  ",@deleted_files), "\n";
     630        }
     631       
     632       
     633        &inexport::mark_docs_for_deletion($archive_info,$block_hash,\@deleted_files,
     634                          $archivedir,$verbosity);
     635       
     636        &inexport::mark_docs_for_reindex($archive_info,$block_hash,
     637                         $archivedir,$verbosity);
     638       
     639        my @reindex_files = sort keys %{$block_hash->{'reindex_files'}};
     640       
     641        if (scalar(@reindex_files>0)) {
     642            print STDERR "Files to reindex since last import:\n  ";
     643            print STDERR join("\n  ",@reindex_files), "\n";
     644        }
     645       
     646       
     647        # not sure if the following will work -- will the metadata data-structure be correctly initialized
     648        # in the right order?
    649649#       foreach my $file (@new_files, @reindex_files) {
    650650#       &plugin::read ($pluginfo, $importdir, $file, $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
    651651#       }
    652 
    653 
     652       
     653        }
     654       
    654655        # Play it safe, and run through the entire folder, only processing new or edited files
    655656        &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
Note: See TracChangeset for help on using the changeset viewer.