Changeset 18469

Show
Ignore:
Timestamp:
06.02.2009 18:19:44 (11 years ago)
Author:
davidb
Message:

Support for reindexing a document added

Location:
gsdl/trunk
Files:
14 modified

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/bin/script/import.pl

    r18456 r18469  
    572572    # gobal blocking pass may set up some metadata 
    573573    &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli); 
    574     # Can now work out which files were new, already existed, and have 
    575     # been deleted 
    576  
    577     &inexport::new_vs_old_import_diff($archive_info,$block_hash,$importdir); 
    578      
    579     my @deleted_files = sort keys %{$block_hash->{'deleted_files'}}; 
    580     if (scalar(@deleted_files>0)) { 
    581         print STDERR "Delete files:\n  "; 
    582         print STDERR join("\n  ",@deleted_files), "\n"; 
    583     } 
    584  
    585     my @new_files = sort keys %{$block_hash->{'new_files'}}; 
    586     if (scalar(@new_files>0)) { 
    587         print STDERR "New files:\n  "; 
    588         print STDERR join("\n  ",@new_files), "\n"; 
    589     } 
    590  
    591     &inexport::mark_docs_for_deletion($archive_info,\@deleted_files,$archivedir, 
    592         $verbosity); 
     574 
     575 
     576    if (!$removeold) { 
     577        # Can now work out which files were new, already existed, and have 
     578        # been deleted 
     579         
     580        &inexport::new_vs_old_import_diff($archive_info,$block_hash,$importdir); 
     581         
     582        my @deleted_files = sort keys %{$block_hash->{'deleted_files'}}; 
     583        if (scalar(@deleted_files>0)) { 
     584        print STDERR "Files deleted since last import:\n  "; 
     585        print STDERR join("\n  ",@deleted_files), "\n"; 
     586        } 
     587         
     588        my @new_files = sort keys %{$block_hash->{'new_files'}}; 
     589        if (scalar(@new_files>0)) { 
     590        print STDERR "New files since last import:\n  "; 
     591        print STDERR join("\n  ",@new_files), "\n"; 
     592        } 
     593         
     594        &inexport::mark_docs_for_deletion($archive_info,\@deleted_files, 
     595                          $archivedir,$verbosity); 
     596 
     597        my @existing_files = sort keys %{$block_hash->{'existing_files'}}; 
     598 
     599        my @reindex_files 
     600        = &inexport::mark_docs_for_reindex($archive_info,\@existing_files, 
     601                           $archivedir,$verbosity); 
     602    } 
    593603 
    594604    &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 
  • gsdl/trunk/common-src/src/gdbmedit/gdbmdel/gdbmdel.cpp

    r18442 r18469  
    7777  // open the database 
    7878#ifdef __WIN32__ 
    79   dbf = gdbm_open (argv[1], block_size, GDBM_READER, 00664, NULL, 0); 
     79  dbf = gdbm_open (argv[1], block_size, GDBM_WRCREAT, 00664, NULL, 1); 
    8080#else 
    81   dbf = gdbm_open  (argv[1], block_size, GDBM_READER, 00664, NULL); 
     81  dbf = gdbm_open  (argv[1], block_size, GDBM_WRCREAT, 00664, NULL); 
    8282#endif 
    8383  if (dbf == NULL)  
    8484    { 
    85       cerr << "couldn't create " << argv[1] << endl; 
     85      cerr << "Couldn't open " << argv[1] << endl; 
    8686      exit (-1); 
    8787    } 
     
    9696  //  -1 == key does not exist or there was an error 
    9797 
     98  if (status < 0) { 
     99    cerr << "Opened database " << argv[1] << endl; 
     100    cerr << "  but couldn't delete: " << argv[2] << endl; 
     101  } 
     102 
    98103  gdbm_close (dbf); 
    99104 
  • gsdl/trunk/common-src/src/gdbmedit/gdbmkeys/gdbmkeys.cpp

    r18442 r18469  
    8282#endif 
    8383  if (dbf == NULL) { 
    84     cerr << "couldn't open " << argv[1] << endl; 
     84    cerr << "Couldn't open " << argv[1] << endl; 
    8585    exit (0); 
    8686  } 
  • gsdl/trunk/common-src/src/gdbmedit/txt2db/txt2db.cpp

    r17783 r18469  
    7272  char *dbname; 
    7373  int append = 0; 
     74  int delkey = 0; 
     75 
    7476  if (argc == 3) { 
    7577    if (strcmp (argv[1], "-append") == 0) { 
     
    105107    value = ""; 
    106108     
    107     // look for [key]\n 
     109    // Parse out 'key' from [key]\n 
     110 
     111    // scan for first occurrence of [ 
    108112    while (!cin.eof() && c != '[') cin.get(c); 
     113 
    109114    if (!cin.eof()) cin.get(c); // skip [ 
     115 
     116    // now look for closing ], building up 'key' as we go 
    110117    while (!cin.eof() && c != ']') { 
    111118      key.push_back ((unsigned char)c); 
    112119      cin.get(c); 
    113120    } 
    114     if (!cin.eof()) cin.get(c); // skip ] 
     121 
     122    if (!cin.eof()) { 
     123      // most likely an eol char, but if '-', then signifies record 
     124      // is to be deleted, not added 
     125      cin.get(c);  
     126      if (c == '-') {    
     127    delkey = 1; 
     128      } 
     129      else { 
     130    delkey = 0; 
     131      } 
     132    } 
    115133    while (!cin.eof() && (c == '\n' || c == '\r')) cin.get(c); 
    116134     
     
    155173      } 
    156174      key_data.dsize = strlen(key_data.dptr); 
     175 
     176      if (delkey) { 
     177    // delete the given key 
     178    if (gdbm_delete(dbf, key_data) < 0) { 
     179      cerr << "gdbm_delete returned an error" << endl; 
     180    } 
     181      } 
     182      else { 
     183 
     184    // add/append 
     185 
     186    // convert value to a datum datatype 
     187    datum value_data; 
     188    value_data.dptr = value.getcstr(); 
     189    if (value_data.dptr == NULL) { 
     190      cerr << "NULL value_data.dptr" << endl; 
     191      exit (0); 
     192    } 
     193    value_data.dsize = strlen(value_data.dptr); 
    157194       
    158       // convert value to a datum datatype 
    159       datum value_data; 
    160       value_data.dptr = value.getcstr(); 
    161       if (value_data.dptr == NULL) { 
    162     cerr << "NULL value_data.dptr" << endl; 
    163     exit (0); 
    164       } 
    165       value_data.dsize = strlen(value_data.dptr); 
    166        
    167       // store the value 
    168       if (gdbm_store (dbf, key_data, value_data, GDBM_REPLACE) < 0) { 
    169     cerr << "gdbm_store returned an error" << endl; 
    170     exit (0); 
    171       } 
    172        
     195    // store the value 
     196    if (gdbm_store (dbf, key_data, value_data, GDBM_REPLACE) < 0) { 
     197      cerr << "gdbm_store returned an error" << endl; 
     198      exit (0); 
     199    } 
     200 
     201     
     202    free(value_data.dptr); 
     203      } 
     204 
    173205      free(key_data.dptr); 
    174       free(value_data.dptr); 
    175206    } 
    176207  } 
  • gsdl/trunk/perllib/GDBMUtils.pm

    r18456 r18469  
    9393 
    9494    # Remove the document from the database 
    95     print STDERR "#Set document\ncmd: gdbmset$exe \"$database\" \"$oid\"\n" if $debug; 
    96  
    97     `gdbmdel$exe "$database" "$oid"`; 
     95 
     96    my $cmd = "gdbmdel$exe \"$database\" \"$oid\""; 
     97    print STDERR "#Delete document\ncmd: $cmd" if $debug; 
     98 
     99    `$cmd`; 
     100 
    98101} 
    99102 
  • gsdl/trunk/perllib/arcinfo.pm

    r18456 r18469  
    251251    } 
    252252 
    253     $self->delete_info ($OID); 
     253    if (defined $self->{'info'}->{$OID}) { 
     254    # test to see if we are in a reindex situation 
     255 
     256    my $existing_status_info = $self->get_status_info($OID); 
     257 
     258    if ($existing_status_info eq "D") { 
     259        # yes, we're in a reindexing situation 
     260        $self->delete_info ($OID); 
     261 
     262 
     263        # force setting to "reindex" 
     264        $index_status = "R"; 
     265 
     266    } 
     267    else { 
     268        # some other, possibly erroneous, situation has arisen 
     269        # where the document already seems to exist 
     270        print STDERR "Warning: $OID already exists with index status $existing_status_info\n"; 
     271        print STDERR "         Deleting previous version\n"; 
     272 
     273        $self->delete_info ($OID); 
     274    } 
     275    } 
     276 
    254277    $self->{'info'}->{$OID} = [$doc_file,$index_status]; 
    255278    push (@{$self->{'order'}}, [$OID, $sortmeta]); 
     279 
     280 
    256281} 
    257282 
  • gsdl/trunk/perllib/basebuilder.pm

    r18441 r18469  
    424424 
    425425    # Output classifier reverse lookup, used in incremental deletion 
    426     #&classify::print_reverse_lookup($infodb_handle); 
     426    ####&classify::print_reverse_lookup($infodb_handle); 
    427427 
    428428    # output doclist 
  • gsdl/trunk/perllib/basebuildproc.pm

    r18463 r18469  
    343343{ 
    344344    my $self = shift (@_); 
    345     my ($field) = @_; 
     345    my ($field,$edit_mode) = @_; 
    346346 
    347347    # Keep some statistics relating to metadata sets used and 
     
    355355    my $core_field = $2; 
    356356 
    357     $self->{'doc_mdprefix_fields'}->{$prefix}->{$core_field}++; 
    358     $self->{'mdprefix_fields'}->{$prefix}->{$core_field}++; 
     357    if (($edit_mode eq "add") || ($edit_mode eq "reindex")) { 
     358        $self->{'doc_mdprefix_fields'}->{$prefix}->{$core_field}++; 
     359        $self->{'mdprefix_fields'}->{$prefix}->{$core_field}++; 
     360    } 
     361    else { 
     362        # delete 
     363        $self->{'doc_mdprefix_fields'}->{$prefix}->{$core_field}--; 
     364        $self->{'mdprefix_fields'}->{$prefix}->{$core_field}--; 
     365    } 
     366 
    359367    } 
    360368    elsif ($field =~ m/^[[:upper:]]/) { 
    361369    # implicit 'ex' metadata set 
    362370 
    363     $self->{'doc_mdprefix_fields'}->{'ex'}->{$field}++; 
    364     $self->{'mdprefix_fields'}->{'ex'}->{$field}++; 
     371    if (($edit_mode eq "add") || ($edit_mode eq "reindex")) { 
     372 
     373        $self->{'doc_mdprefix_fields'}->{'ex'}->{$field}++; 
     374        $self->{'mdprefix_fields'}->{'ex'}->{$field}++; 
     375    } 
     376    else { 
     377        # delete 
     378        $self->{'doc_mdprefix_fields'}->{'ex'}->{$field}--; 
     379        $self->{'mdprefix_fields'}->{'ex'}->{$field}--; 
     380    } 
    365381    } 
    366382 
     
    425441    # delete 
    426442    $self->{'num_docs'} -= 1 unless ($doctype eq "classification"); 
    427     return; 
    428443    } 
    429444 
     
    448463 
    449464    # update a few statistics 
    450     $self->{'num_bytes'} += $doc_obj->get_text_length ($section); 
    451     $self->{'num_sections'} += 1 unless ($doctype eq "classification"); 
     465    if (($edit_mode eq "add") || ($edit_mode eq "reindex")) { 
     466 
     467        $self->{'num_bytes'} += $doc_obj->get_text_length ($section); 
     468        $self->{'num_sections'} += 1 unless ($doctype eq "classification"); 
     469    } 
     470    else { 
     471        # delete 
     472        $self->{'num_bytes'} -= $doc_obj->get_text_length ($section); 
     473        $self->{'num_sections'} -= 1 unless ($doctype eq "classification"); 
     474    } 
    452475 
    453476    # output the fact that this document is a document (unless doctype 
     
    488511        # special case for URL metadata 
    489512        if ($field =~ /^URL$/i) { 
    490             &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $value, { 'section' => [ $section_OID ] }); 
     513            if (($edit_mode eq "add") || ($edit_mode eq "reindex")) { 
     514 
     515            &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $value, { 'section' => [ $section_OID ] }); 
     516            } 
     517            else { 
     518            # delete 
     519            &dbutil::delete_infodb_entry($self->{'infodbtype'}, $infodb_handle, $value); 
     520            } 
     521 
     522             
    491523        } 
    492524 
     
    496528            if ($section eq "" && $self->{'store_metadata_coverage'} =~ /^true$/i) 
    497529            { 
    498             $self->infodb_metadata_stats($field); 
     530            $self->infodb_metadata_stats($field,$edit_mode); 
    499531            } 
    500532        } 
     
    565597    }  
    566598     
    567     &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $section_OID, \%section_infodb); 
     599    if (($edit_mode eq "add") || ($edit_mode eq "reindex")) { 
     600 
     601        &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $section_OID, \%section_infodb); 
     602    } 
     603    else { 
     604        # delete 
     605        &dbutil::delete_infodb_entry($self->{'infodbtype'}, $infodb_handle, $section_OID); 
     606    } 
     607 
    568608     
    569609    # output a database entry for the document number, except for Lucene (which no longer needs this information) 
    570610    unless (ref($self) eq "lucenebuildproc") 
    571611    { 
    572         if ($self->{'db_level'} eq "document") { 
    573         &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $self->{'num_docs'}, { 'section' => [ $doc_OID ] }); 
     612        if (($edit_mode eq "add") || ($edit_mode eq "reindex")) { 
     613         
     614        if ($self->{'db_level'} eq "document") { 
     615            &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $self->{'num_docs'}, { 'section' => [ $doc_OID ] }); 
     616        } 
     617        else { 
     618            &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $self->{'num_sections'}, { 'section' => [ $section_OID ] }); 
     619        } 
    574620        } 
    575621        else { 
    576         &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $self->{'num_sections'}, { 'section' => [ $section_OID ] }); 
     622 
     623        if ($self->{'db_level'} eq "document") { 
     624            &dbutil::delete_infodb_entry($self->{'infodbtype'}, $infodb_handle, $self->{'num_docs'}); 
     625        } 
     626        else { 
     627            &dbutil::delete_infodb_entry($self->{'infodbtype'}, $infodb_handle, $self->{'num_sections'}); 
     628        } 
     629 
    577630        } 
    578631    } 
  • gsdl/trunk/perllib/classify/AZList.pm

    r18455 r18469  
    124124    my $outhandle = $self->{'outhandle'}; 
    125125 
    126     if ($edit_mode eq "delete") { 
     126    if (($edit_mode eq "delete") || ($edit_mode eq "reindex")) { 
    127127    $self->oid_hash_delete($doc_OID,'list'); 
    128     return; 
     128    return if ($edit_mode eq "delete");      
    129129    } 
    130130 
  • gsdl/trunk/perllib/inexport.pm

    r18457 r18469  
    2727 
    2828use strict; 
     29 
     30use File::Basename; 
    2931 
    3032use util; 
     
    5355    } 
    5456 
     57 
    5558    # Figure out which are the new files, existing files and so 
    5659    # by implication the files from the previous import that are not 
     
    6871    } 
    6972 
     73    if (defined $block_hash->{'file_blocks'}->{$full_curr_file}) { 
     74        # If in block list, we want to ignore it 
     75        delete $block_hash->{'all_files'}->{$curr_file}; 
     76 
     77        if (defined $full_prev_all_files->{$full_curr_file}) { 
     78        # also make sure it is gone from 'previous' list so 
     79        # not mistaken for a file that needs to be deleted 
     80        delete $full_prev_all_files->{$full_curr_file}; 
     81        } 
     82        next; 
     83    } 
     84 
    7085    # figure of if new file or not 
    7186    if (defined $full_prev_all_files->{$full_curr_file}) { 
     87         
    7288        # had it before 
    73         $block_hash->{'existing_files'}->{$curr_file} = 1; 
     89        $block_hash->{'existing_files'}->{$full_curr_file} = 1; 
     90 
    7491        # Now remove it, so by end of loop only the files 
    7592        # that need deleting are left 
     
    7895        } 
    7996    else { 
    80         $block_hash->{'new_files'}->{$curr_file} = 1; 
     97        $block_hash->{'new_files'}->{$full_curr_file} = 1; 
    8198    } 
    8299     
     
    84101    } 
    85102 
    86     # By this point full_prev_all_files contains only the files 
    87     # that are not in the current import folder => i.e. files 
    88     # to be deleted 
     103    # By this point full_prev_all_files contains the files 
     104    # mentioned in archiveinf-src.db but are not in the 'import' 
     105    # folder (or whatever was specified through -importdir ...) 
     106 
     107    # This list can contain files that were created in the 'tmp' or 
     108    # 'cache' areas (such as screen-size and thumbnail images). 
    89109    # 
    90     # The value in each key is its "local" import file name, which is what  
    91     # we want to use 
     110    # In building the final list of files to delete, we test to see if 
     111    # it exists on the filesystem and if it does (unusual for a file 
     112    # that's allegedly deleted!) , supress it from going into the final 
     113    # list 
     114 
     115    my $collectdir = $ENV{'GSDLCOLLECTDIR'}; 
     116 
    92117    my @deleted_files = values %$full_prev_all_files; 
    93     map { $block_hash->{'deleted_files'}->{$_} = 1 } @deleted_files; 
     118    map { my $curr_file = $_; 
     119      my $full_curr_file = $curr_file; 
     120 
     121      if (!&util::filename_is_absolute($curr_file)) { 
     122          # add in import dir to make absolute 
     123 
     124          $full_curr_file = &util::filename_cat($collectdir,$curr_file); 
     125      } 
     126 
     127 
     128      if (!-e $full_curr_file) { 
     129          $block_hash->{'deleted_files'}->{$curr_file} = 1; 
     130      } 
     131      } @deleted_files; 
    94132} 
    95133 
     
    110148    # this file is used in (note in most cases, it's just one OID) 
    111149     
    112     # An improvement would be to have the record read 
    113     # into a hash array 
    114150    my $src_rec = GDBMUtils::gdbmRecordToHash($arcinfo_src_filename,$file); 
    115151    my $oids = $src_rec->{'oid'}; 
    116152    foreach my $oid (@$oids) { 
    117153 
    118         # find out if it's an assoc file or main doc 
     154        # Find out if it's an assoc file or main doc 
    119155 
    120156        my $doc_rec = GDBMUtils::gdbmRecordToHash($arcinfo_doc_filename,$oid); 
    121 ##      print STDERR "file = $file\n"; 
    122  
    123157        if ($doc_rec->{'src-file'}->[0] eq $file) { 
    124         # mark it for deletion 
     158        # It's the main doc 
     159        # => mark it for deletion 
     160     
    125161        if ($verbosity>1) { 
    126             print STDERR "$oid marked to be deleted\n"; 
     162            print STDERR "$oid marked to be deleted from index on next buildcol.pl\n"; 
    127163        } 
    128164        $archive_info->set_status_info($oid,"D"); 
    129165 
    130166        my $val = &GDBMUtils::gdbmDatabaseGet($arcinfo_doc_filename,$oid); 
    131         $val =~ s/^<index-status>(.*)$/<index-status>D/m; 
    132         &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val); 
     167        my ($index_status) = ($val =~ m/^<index-status>(.*)$/m); 
     168        if ($index_status ne "D") { 
     169            $val =~ s/^<index-status>(.*)$/<index-status>D/m; 
     170            &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val); 
     171            my $doc_file = $doc_rec->{'doc-file'}->[0]; 
     172 
     173            my $doc_filename = &util::filename_cat($archivedir,$doc_file); 
     174 
     175 
     176            my ($doc_tailname, $doc_dirname, $suffix)  
     177            = File::Basename::fileparse($doc_filename, "\\.[^\\.]+\$"); 
     178 
     179            print STDERR "Removing $doc_dirname\n" if ($verbosity>2); 
     180 
     181            &util::rm_r($doc_dirname); 
     182             
     183        } 
    133184        } 
    134185        else { 
     
    149200        } 
    150201        } 
     202 
     203        GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file); 
    151204    } 
    152205    } 
     
    155208 
    156209 
     210sub mark_docs_for_reindex 
     211{ 
     212    my ($archive_info,$existing_files_ref,$archivedir,$verbosity) = @_; 
     213 
     214    # Reindexing is accomplished by deleting the previously indexed 
     215    # version of the document, and then allowing the new version to 
     216    # be indexed (as would a new document be indexed).  
     217    #  
     218    # The first step (marking for deletion) is implemented by this routine. 
     219    #  
     220    # By default in Greenstone a new version of an index will hash to 
     221    # a new unique OID, and the above strategy of reindex=delete+add 
     222    # works fine.  A special case arises when a persistent OID is  
     223    # allocated to a document (for instance through a metadata field), 
     224    # and the second step to reindexing (see XXXX) detects this and 
     225    # deals with it appropriately. 
     226 
     227    my $db_ext = &util::is_little_endian() ? ".ldb" : ".bdb"; 
     228    my $doc_db = "archiveinf-doc$db_ext"; 
     229    my $arcinfo_doc_filename = &util::filename_cat ($archivedir, $doc_db); 
     230 
     231 
     232    my $archiveinf_timestamp = -M $arcinfo_doc_filename; 
     233 
     234    my $reindex_files_ref = []; 
     235 
     236    foreach my $existing_filename (@$existing_files_ref) { 
     237     
     238    if (-M $existing_filename < $archiveinf_timestamp) { 
     239        # file is newer than last build 
     240         
     241        my $existing_file = $existing_filename; 
     242        my $collectdir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}); 
     243 
     244        $existing_file =~ s/^$collectdir(\\|\/)?//; 
     245         
     246        print STDERR "**** Deleting existing file: $existing_file\n"; 
     247 
     248        push(@$reindex_files_ref,$existing_file); 
     249    } 
     250 
     251    } 
     252     
     253    mark_docs_for_deletion($archive_info,$reindex_files_ref,$archivedir,$verbosity); 
     254 
     255    return @$reindex_files_ref; 
     256} 
     257 
     258 
     259 
    1572601; 
  • gsdl/trunk/perllib/plugins/ArchivesInfPlugin.pm

    r18456 r18469  
    9090 
    9191    if (defined $archive_info) { 
    92     print STDERR "********* have parsed and processed an archive info file\n"; 
    93  
    9492    my $archive_info_filename = $self->{'archive_info_filename'}; 
    9593 
     
    10098 
    10199        my $index_status = $archive_info->get_status_info($doc_oid); 
    102         print STDERR "*** Updating $doc_oid $index_status\n"; 
     100####        print STDERR "*** Away to Update $doc_oid $index_status\n"; 
    103101 
    104102        if ($index_status eq "D") { 
    105103        # delete 
    106104        $archive_info->delete_info($doc_oid); 
     105        &GDBMUtils::gdbmDatabaseRemove($archive_info_filename,$doc_oid); 
    107106        } 
    108107        elsif ($index_status =~ m/^(I|R)$/) { 
     
    215214        my $process_file = 1; 
    216215 
    217         # ...unless the build processor is incremental capable and -incremental was specified 
     216        # ...unless the build processor is incremental capable and -incremental was specified, in which case we need to check its index_status flag 
    218217        if ($processor->is_incremental_capable() && $self->{'incremental'}) 
    219218        { 
     
    230229        } 
    231230        elsif ($index_status eq "R") { 
    232             # Need to be delete it from the index. 
     231            # Need to be reindexed/replaced 
    233232            $new_mode = $curr_mode."reindex"; 
     233 
    234234            $process_file = 1; 
    235235        } 
     
    237237        # ... or we're being asked to delete it (in which case skip it) 
    238238        elsif ($index_status eq "D") { 
    239         # Delete it somehow from archives dir!! 
    240         # => get short name, lop off filename, concat archivedir 
    241         # move to recyle bin 
     239        # Non-incremental Delete 
     240        # It's already been deleted from the archives directory 
     241        # (done during import.pl) 
     242        # => All we need to do here is not process it 
    242243 
    243244        $process_file = 0; 
    244245        } 
     246 
     247        if (!$processor->is_incremental_capable() && $self->{'incremental'}) { 
     248        # Nag feature 
     249        if (!defined $self->{'incremental-warning'}) { 
     250            print $outhandle "\n"; 
     251            print $outhandle "Warning: command-line option '-incremental' used with *non-incremental*\n"; 
     252            print $outhandle "         processor '", ref $processor, "'. Some conflicts may arise.\n"; 
     253            print $outhandle "\n"; 
     254            sleep 10; 
     255            $self->{'incremental-warning'} = 1; 
     256        } 
     257        } 
     258 
     259 
    245260 
    246261        if ($process_file) { 
  • gsdl/trunk/perllib/plugins/BasePlugin.pm

    r18441 r18469  
    833833 
    834834    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
     835 
    835836    # create a new document 
    836837    my $doc_obj = new doc ($filename_full_path, "indexed_doc", $self->{'file_rename_method'}); 
     
    840841    $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename_full_path)); 
    841842  
     843 
    842844    # sets the UTF8 filename (Source) for display and sets the url ref to URL encoded version 
    843845    # of the UTF8 filename (SourceFile) for generated files 
    844846    $self->set_Source_metadata($doc_obj, $filename_no_path); 
     847 
    845848 
    846849    # plugin specific stuff - what args do we need here?? 
     
    946949    # delete any temp files that we may have created 
    947950    $self->clean_up_after_doc_obj_processing(); 
     951 
    948952 
    949953    # if process_status == 1, then the file has been processed. 
  • gsdl/trunk/perllib/plugins/DirectoryPlugin.pm

    r18441 r18469  
    575575 
    576576    if (defined $self->{'inf_timestamp'}) { 
    577         my $inf_timestamp = $self->{'inf_timestamp'}; 
    578  
    579         if (! -d $full_filename) { 
    580         my $filename_timestamp = -M $full_filename; 
    581         if ($filename_timestamp > $inf_timestamp) { 
    582             # filename has been around for longer than inf 
    583             print $outhandle "**** Skipping $subfile\n" if ($verbosity >3); 
    584             next; 
     577        # Look to see if it's a completely new file 
     578 
     579        if (!$block_hash->{'new_files'}->{$full_filename}) { 
     580        # Not a new file, must be an existing file 
     581        # Let' see if it's newer than the last import.pl 
     582 
     583 
     584        my $inf_timestamp = $self->{'inf_timestamp'}; 
     585 
     586        if (! -d $full_filename) { 
     587            my $filename_timestamp = -M $full_filename; 
     588            if ($filename_timestamp > $inf_timestamp) { 
     589            # filename has been around for longer than inf 
     590            print $outhandle "**** Skipping $subfile\n" if ($verbosity >3); 
     591            next; 
     592            } 
     593            else { 
     594            # Remove old folder in archives (might hash to something different) 
     595            # *** should be doing this on a Del one as well 
     596            # but leave folder name?? and ensure hashs to 
     597            # same again?? 
     598 
     599            # Then let through as new doc?? 
     600 
     601            # mark to doc-oids that rely on it for re-indexing 
     602            ## &GDBMUtils::gdbmDatabase(); 
     603 
     604            } 
    585605        } 
    586606        } 
  • gsdl/trunk/perllib/util.pm

    r18463 r18469  
    3535sub rm { 
    3636    my (@files) = @_; 
     37 
    3738    my @filefiles = (); 
    3839