Changeset 18469 for gsdl


Ignore:
Timestamp:
02/06/09 18:19:44 (12 years ago)
Author:
davidb
Message:

Support for reindexing a document added

Location:
gsdl/trunk
Files:
14 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/bin/script/import.pl

    r18456 r18469  
    572572    # gobal blocking pass may set up some metadata
    573573    &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli);
    574     # Can now work out which files were new, already existed, and have
    575     # been deleted
    576 
    577     &inexport::new_vs_old_import_diff($archive_info,$block_hash,$importdir);
    578    
    579     my @deleted_files = sort keys %{$block_hash->{'deleted_files'}};
    580     if (scalar(@deleted_files>0)) {
    581         print STDERR "Delete files:\n  ";
    582         print STDERR join("\n  ",@deleted_files), "\n";
    583     }
    584 
    585     my @new_files = sort keys %{$block_hash->{'new_files'}};
    586     if (scalar(@new_files>0)) {
    587         print STDERR "New files:\n  ";
    588         print STDERR join("\n  ",@new_files), "\n";
    589     }
    590 
    591     &inexport::mark_docs_for_deletion($archive_info,\@deleted_files,$archivedir,
    592         $verbosity);
     574
     575
     576    if (!$removeold) {
     577        # Can now work out which files were new, already existed, and have
     578        # been deleted
     579       
     580        &inexport::new_vs_old_import_diff($archive_info,$block_hash,$importdir);
     581       
     582        my @deleted_files = sort keys %{$block_hash->{'deleted_files'}};
     583        if (scalar(@deleted_files>0)) {
     584        print STDERR "Files deleted since last import:\n  ";
     585        print STDERR join("\n  ",@deleted_files), "\n";
     586        }
     587       
     588        my @new_files = sort keys %{$block_hash->{'new_files'}};
     589        if (scalar(@new_files>0)) {
     590        print STDERR "New files since last import:\n  ";
     591        print STDERR join("\n  ",@new_files), "\n";
     592        }
     593       
     594        &inexport::mark_docs_for_deletion($archive_info,\@deleted_files,
     595                          $archivedir,$verbosity);
     596
     597        my @existing_files = sort keys %{$block_hash->{'existing_files'}};
     598
     599        my @reindex_files
     600        = &inexport::mark_docs_for_reindex($archive_info,\@existing_files,
     601                           $archivedir,$verbosity);
     602    }
    593603
    594604    &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
  • gsdl/trunk/common-src/src/gdbmedit/gdbmdel/gdbmdel.cpp

    r18442 r18469  
    7777  // open the database
    7878#ifdef __WIN32__
    79   dbf = gdbm_open (argv[1], block_size, GDBM_READER, 00664, NULL, 0);
     79  dbf = gdbm_open (argv[1], block_size, GDBM_WRCREAT, 00664, NULL, 1);
    8080#else
    81   dbf = gdbm_open  (argv[1], block_size, GDBM_READER, 00664, NULL);
     81  dbf = gdbm_open  (argv[1], block_size, GDBM_WRCREAT, 00664, NULL);
    8282#endif
    8383  if (dbf == NULL)
    8484    {
    85       cerr << "couldn't create " << argv[1] << endl;
     85      cerr << "Couldn't open " << argv[1] << endl;
    8686      exit (-1);
    8787    }
     
    9696  //  -1 == key does not exist or there was an error
    9797
     98  if (status < 0) {
     99    cerr << "Opened database " << argv[1] << endl;
     100    cerr << "  but couldn't delete: " << argv[2] << endl;
     101  }
     102
    98103  gdbm_close (dbf);
    99104
  • gsdl/trunk/common-src/src/gdbmedit/gdbmkeys/gdbmkeys.cpp

    r18442 r18469  
    8282#endif
    8383  if (dbf == NULL) {
    84     cerr << "couldn't open " << argv[1] << endl;
     84    cerr << "Couldn't open " << argv[1] << endl;
    8585    exit (0);
    8686  }
  • gsdl/trunk/common-src/src/gdbmedit/txt2db/txt2db.cpp

    r17783 r18469  
    7272  char *dbname;
    7373  int append = 0;
     74  int delkey = 0;
     75
    7476  if (argc == 3) {
    7577    if (strcmp (argv[1], "-append") == 0) {
     
    105107    value = "";
    106108   
    107     // look for [key]\n
     109    // Parse out 'key' from [key]\n
     110
     111    // scan for first occurrence of [
    108112    while (!cin.eof() && c != '[') cin.get(c);
     113
    109114    if (!cin.eof()) cin.get(c); // skip [
     115
     116    // now look for closing ], building up 'key' as we go
    110117    while (!cin.eof() && c != ']') {
    111118      key.push_back ((unsigned char)c);
    112119      cin.get(c);
    113120    }
    114     if (!cin.eof()) cin.get(c); // skip ]
     121
     122    if (!cin.eof()) {
     123      // most likely an eol char, but if '-', then signifies record
     124      // is to be deleted, not added
     125      cin.get(c);
     126      if (c == '-') {   
     127    delkey = 1;
     128      }
     129      else {
     130    delkey = 0;
     131      }
     132    }
    115133    while (!cin.eof() && (c == '\n' || c == '\r')) cin.get(c);
    116134   
     
    155173      }
    156174      key_data.dsize = strlen(key_data.dptr);
     175
     176      if (delkey) {
     177    // delete the given key
     178    if (gdbm_delete(dbf, key_data) < 0) {
     179      cerr << "gdbm_delete returned an error" << endl;
     180    }
     181      }
     182      else {
     183
     184    // add/append
     185
     186    // convert value to a datum datatype
     187    datum value_data;
     188    value_data.dptr = value.getcstr();
     189    if (value_data.dptr == NULL) {
     190      cerr << "NULL value_data.dptr" << endl;
     191      exit (0);
     192    }
     193    value_data.dsize = strlen(value_data.dptr);
    157194     
    158       // convert value to a datum datatype
    159       datum value_data;
    160       value_data.dptr = value.getcstr();
    161       if (value_data.dptr == NULL) {
    162     cerr << "NULL value_data.dptr" << endl;
    163     exit (0);
    164       }
    165       value_data.dsize = strlen(value_data.dptr);
    166      
    167       // store the value
    168       if (gdbm_store (dbf, key_data, value_data, GDBM_REPLACE) < 0) {
    169     cerr << "gdbm_store returned an error" << endl;
    170     exit (0);
    171       }
    172      
     195    // store the value
     196    if (gdbm_store (dbf, key_data, value_data, GDBM_REPLACE) < 0) {
     197      cerr << "gdbm_store returned an error" << endl;
     198      exit (0);
     199    }
     200
     201   
     202    free(value_data.dptr);
     203      }
     204
    173205      free(key_data.dptr);
    174       free(value_data.dptr);
    175206    }
    176207  }
  • gsdl/trunk/perllib/GDBMUtils.pm

    r18456 r18469  
    9393
    9494    # Remove the document from the database
    95     print STDERR "#Set document\ncmd: gdbmset$exe \"$database\" \"$oid\"\n" if $debug;
    96 
    97     `gdbmdel$exe "$database" "$oid"`;
     95
     96    my $cmd = "gdbmdel$exe \"$database\" \"$oid\"";
     97    print STDERR "#Delete document\ncmd: $cmd" if $debug;
     98
     99    `$cmd`;
     100
    98101}
    99102
  • gsdl/trunk/perllib/arcinfo.pm

    r18456 r18469  
    251251    }
    252252
    253     $self->delete_info ($OID);
     253    if (defined $self->{'info'}->{$OID}) {
     254    # test to see if we are in a reindex situation
     255
     256    my $existing_status_info = $self->get_status_info($OID);
     257
     258    if ($existing_status_info eq "D") {
     259        # yes, we're in a reindexing situation
     260        $self->delete_info ($OID);
     261
     262
     263        # force setting to "reindex"
     264        $index_status = "R";
     265
     266    }
     267    else {
     268        # some other, possibly erroneous, situation has arisen
     269        # where the document already seems to exist
     270        print STDERR "Warning: $OID already exists with index status $existing_status_info\n";
     271        print STDERR "         Deleting previous version\n";
     272
     273        $self->delete_info ($OID);
     274    }
     275    }
     276
    254277    $self->{'info'}->{$OID} = [$doc_file,$index_status];
    255278    push (@{$self->{'order'}}, [$OID, $sortmeta]);
     279
     280
    256281}
    257282
  • gsdl/trunk/perllib/basebuilder.pm

    r18441 r18469  
    424424
    425425    # Output classifier reverse lookup, used in incremental deletion
    426     #&classify::print_reverse_lookup($infodb_handle);
     426    ####&classify::print_reverse_lookup($infodb_handle);
    427427
    428428    # output doclist
  • gsdl/trunk/perllib/basebuildproc.pm

    r18463 r18469  
    343343{
    344344    my $self = shift (@_);
    345     my ($field) = @_;
     345    my ($field,$edit_mode) = @_;
    346346
    347347    # Keep some statistics relating to metadata sets used and
     
    355355    my $core_field = $2;
    356356
    357     $self->{'doc_mdprefix_fields'}->{$prefix}->{$core_field}++;
    358     $self->{'mdprefix_fields'}->{$prefix}->{$core_field}++;
     357    if (($edit_mode eq "add") || ($edit_mode eq "reindex")) {
     358        $self->{'doc_mdprefix_fields'}->{$prefix}->{$core_field}++;
     359        $self->{'mdprefix_fields'}->{$prefix}->{$core_field}++;
     360    }
     361    else {
     362        # delete
     363        $self->{'doc_mdprefix_fields'}->{$prefix}->{$core_field}--;
     364        $self->{'mdprefix_fields'}->{$prefix}->{$core_field}--;
     365    }
     366
    359367    }
    360368    elsif ($field =~ m/^[[:upper:]]/) {
    361369    # implicit 'ex' metadata set
    362370
    363     $self->{'doc_mdprefix_fields'}->{'ex'}->{$field}++;
    364     $self->{'mdprefix_fields'}->{'ex'}->{$field}++;
     371    if (($edit_mode eq "add") || ($edit_mode eq "reindex")) {
     372
     373        $self->{'doc_mdprefix_fields'}->{'ex'}->{$field}++;
     374        $self->{'mdprefix_fields'}->{'ex'}->{$field}++;
     375    }
     376    else {
     377        # delete
     378        $self->{'doc_mdprefix_fields'}->{'ex'}->{$field}--;
     379        $self->{'mdprefix_fields'}->{'ex'}->{$field}--;
     380    }
    365381    }
    366382
     
    425441    # delete
    426442    $self->{'num_docs'} -= 1 unless ($doctype eq "classification");
    427     return;
    428443    }
    429444
     
    448463
    449464    # update a few statistics
    450     $self->{'num_bytes'} += $doc_obj->get_text_length ($section);
    451     $self->{'num_sections'} += 1 unless ($doctype eq "classification");
     465    if (($edit_mode eq "add") || ($edit_mode eq "reindex")) {
     466
     467        $self->{'num_bytes'} += $doc_obj->get_text_length ($section);
     468        $self->{'num_sections'} += 1 unless ($doctype eq "classification");
     469    }
     470    else {
     471        # delete
     472        $self->{'num_bytes'} -= $doc_obj->get_text_length ($section);
     473        $self->{'num_sections'} -= 1 unless ($doctype eq "classification");
     474    }
    452475
    453476    # output the fact that this document is a document (unless doctype
     
    488511        # special case for URL metadata
    489512        if ($field =~ /^URL$/i) {
    490             &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $value, { 'section' => [ $section_OID ] });
     513            if (($edit_mode eq "add") || ($edit_mode eq "reindex")) {
     514
     515            &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $value, { 'section' => [ $section_OID ] });
     516            }
     517            else {
     518            # delete
     519            &dbutil::delete_infodb_entry($self->{'infodbtype'}, $infodb_handle, $value);
     520            }
     521
     522           
    491523        }
    492524
     
    496528            if ($section eq "" && $self->{'store_metadata_coverage'} =~ /^true$/i)
    497529            {
    498             $self->infodb_metadata_stats($field);
     530            $self->infodb_metadata_stats($field,$edit_mode);
    499531            }
    500532        }
     
    565597    }
    566598   
    567     &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $section_OID, \%section_infodb);
     599    if (($edit_mode eq "add") || ($edit_mode eq "reindex")) {
     600
     601        &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $section_OID, \%section_infodb);
     602    }
     603    else {
     604        # delete
     605        &dbutil::delete_infodb_entry($self->{'infodbtype'}, $infodb_handle, $section_OID);
     606    }
     607
    568608   
    569609    # output a database entry for the document number, except for Lucene (which no longer needs this information)
    570610    unless (ref($self) eq "lucenebuildproc")
    571611    {
    572         if ($self->{'db_level'} eq "document") {
    573         &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $self->{'num_docs'}, { 'section' => [ $doc_OID ] });
     612        if (($edit_mode eq "add") || ($edit_mode eq "reindex")) {
     613       
     614        if ($self->{'db_level'} eq "document") {
     615            &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $self->{'num_docs'}, { 'section' => [ $doc_OID ] });
     616        }
     617        else {
     618            &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $self->{'num_sections'}, { 'section' => [ $section_OID ] });
     619        }
    574620        }
    575621        else {
    576         &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $self->{'num_sections'}, { 'section' => [ $section_OID ] });
     622
     623        if ($self->{'db_level'} eq "document") {
     624            &dbutil::delete_infodb_entry($self->{'infodbtype'}, $infodb_handle, $self->{'num_docs'});
     625        }
     626        else {
     627            &dbutil::delete_infodb_entry($self->{'infodbtype'}, $infodb_handle, $self->{'num_sections'});
     628        }
     629
    577630        }
    578631    }
  • gsdl/trunk/perllib/classify/AZList.pm

    r18455 r18469  
    124124    my $outhandle = $self->{'outhandle'};
    125125
    126     if ($edit_mode eq "delete") {
     126    if (($edit_mode eq "delete") || ($edit_mode eq "reindex")) {
    127127    $self->oid_hash_delete($doc_OID,'list');
    128     return;
     128    return if ($edit_mode eq "delete");     
    129129    }
    130130
  • gsdl/trunk/perllib/inexport.pm

    r18457 r18469  
    2727
    2828use strict;
     29
     30use File::Basename;
    2931
    3032use util;
     
    5355    }
    5456
     57
    5558    # Figure out which are the new files, existing files and so
    5659    # by implication the files from the previous import that are not
     
    6871    }
    6972
     73    if (defined $block_hash->{'file_blocks'}->{$full_curr_file}) {
     74        # If in block list, we want to ignore it
     75        delete $block_hash->{'all_files'}->{$curr_file};
     76
     77        if (defined $full_prev_all_files->{$full_curr_file}) {
     78        # also make sure it is gone from 'previous' list so
     79        # not mistaken for a file that needs to be deleted
     80        delete $full_prev_all_files->{$full_curr_file};
     81        }
     82        next;
     83    }
     84
    7085    # figure of if new file or not
    7186    if (defined $full_prev_all_files->{$full_curr_file}) {
     87       
    7288        # had it before
    73         $block_hash->{'existing_files'}->{$curr_file} = 1;
     89        $block_hash->{'existing_files'}->{$full_curr_file} = 1;
     90
    7491        # Now remove it, so by end of loop only the files
    7592        # that need deleting are left
     
    7895        }
    7996    else {
    80         $block_hash->{'new_files'}->{$curr_file} = 1;
     97        $block_hash->{'new_files'}->{$full_curr_file} = 1;
    8198    }
    8299   
     
    84101    }
    85102
    86     # By this point full_prev_all_files contains only the files
    87     # that are not in the current import folder => i.e. files
    88     # to be deleted
     103    # By this point full_prev_all_files contains the files
     104    # mentioned in archiveinf-src.db but are not in the 'import'
     105    # folder (or whatever was specified through -importdir ...)
     106
     107    # This list can contain files that were created in the 'tmp' or
     108    # 'cache' areas (such as screen-size and thumbnail images).
    89109    #
    90     # The value in each key is its "local" import file name, which is what
    91     # we want to use
     110    # In building the final list of files to delete, we test to see if
     111    # it exists on the filesystem and if it does (unusual for a file
     112    # that's allegedly deleted!) , supress it from going into the final
     113    # list
     114
     115    my $collectdir = $ENV{'GSDLCOLLECTDIR'};
     116
    92117    my @deleted_files = values %$full_prev_all_files;
    93     map { $block_hash->{'deleted_files'}->{$_} = 1 } @deleted_files;
     118    map { my $curr_file = $_;
     119      my $full_curr_file = $curr_file;
     120
     121      if (!&util::filename_is_absolute($curr_file)) {
     122          # add in import dir to make absolute
     123
     124          $full_curr_file = &util::filename_cat($collectdir,$curr_file);
     125      }
     126
     127
     128      if (!-e $full_curr_file) {
     129          $block_hash->{'deleted_files'}->{$curr_file} = 1;
     130      }
     131      } @deleted_files;
    94132}
    95133
     
    110148    # this file is used in (note in most cases, it's just one OID)
    111149   
    112     # An improvement would be to have the record read
    113     # into a hash array
    114150    my $src_rec = GDBMUtils::gdbmRecordToHash($arcinfo_src_filename,$file);
    115151    my $oids = $src_rec->{'oid'};
    116152    foreach my $oid (@$oids) {
    117153
    118         # find out if it's an assoc file or main doc
     154        # Find out if it's an assoc file or main doc
    119155
    120156        my $doc_rec = GDBMUtils::gdbmRecordToHash($arcinfo_doc_filename,$oid);
    121 ##      print STDERR "file = $file\n";
    122 
    123157        if ($doc_rec->{'src-file'}->[0] eq $file) {
    124         # mark it for deletion
     158        # It's the main doc
     159        # => mark it for deletion
     160   
    125161        if ($verbosity>1) {
    126             print STDERR "$oid marked to be deleted\n";
     162            print STDERR "$oid marked to be deleted from index on next buildcol.pl\n";
    127163        }
    128164        $archive_info->set_status_info($oid,"D");
    129165
    130166        my $val = &GDBMUtils::gdbmDatabaseGet($arcinfo_doc_filename,$oid);
    131         $val =~ s/^<index-status>(.*)$/<index-status>D/m;
    132         &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val);
     167        my ($index_status) = ($val =~ m/^<index-status>(.*)$/m);
     168        if ($index_status ne "D") {
     169            $val =~ s/^<index-status>(.*)$/<index-status>D/m;
     170            &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val);
     171            my $doc_file = $doc_rec->{'doc-file'}->[0];
     172
     173            my $doc_filename = &util::filename_cat($archivedir,$doc_file);
     174
     175
     176            my ($doc_tailname, $doc_dirname, $suffix)
     177            = File::Basename::fileparse($doc_filename, "\\.[^\\.]+\$");
     178
     179            print STDERR "Removing $doc_dirname\n" if ($verbosity>2);
     180
     181            &util::rm_r($doc_dirname);
     182           
     183        }
    133184        }
    134185        else {
     
    149200        }
    150201        }
     202
     203        GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file);
    151204    }
    152205    }
     
    155208
    156209
     210sub mark_docs_for_reindex
     211{
     212    my ($archive_info,$existing_files_ref,$archivedir,$verbosity) = @_;
     213
     214    # Reindexing is accomplished by deleting the previously indexed
     215    # version of the document, and then allowing the new version to
     216    # be indexed (as would a new document be indexed).
     217    #
     218    # The first step (marking for deletion) is implemented by this routine.
     219    #
     220    # By default in Greenstone a new version of an index will hash to
     221    # a new unique OID, and the above strategy of reindex=delete+add
     222    # works fine.  A special case arises when a persistent OID is
     223    # allocated to a document (for instance through a metadata field),
     224    # and the second step to reindexing (see XXXX) detects this and
     225    # deals with it appropriately.
     226
     227    my $db_ext = &util::is_little_endian() ? ".ldb" : ".bdb";
     228    my $doc_db = "archiveinf-doc$db_ext";
     229    my $arcinfo_doc_filename = &util::filename_cat ($archivedir, $doc_db);
     230
     231
     232    my $archiveinf_timestamp = -M $arcinfo_doc_filename;
     233
     234    my $reindex_files_ref = [];
     235
     236    foreach my $existing_filename (@$existing_files_ref) {
     237   
     238    if (-M $existing_filename < $archiveinf_timestamp) {
     239        # file is newer than last build
     240       
     241        my $existing_file = $existing_filename;
     242        my $collectdir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'});
     243
     244        $existing_file =~ s/^$collectdir(\\|\/)?//;
     245       
     246        print STDERR "**** Deleting existing file: $existing_file\n";
     247
     248        push(@$reindex_files_ref,$existing_file);
     249    }
     250
     251    }
     252   
     253    mark_docs_for_deletion($archive_info,$reindex_files_ref,$archivedir,$verbosity);
     254
     255    return @$reindex_files_ref;
     256}
     257
     258
     259
    1572601;
  • gsdl/trunk/perllib/plugins/ArchivesInfPlugin.pm

    r18456 r18469  
    9090
    9191    if (defined $archive_info) {
    92     print STDERR "********* have parsed and processed an archive info file\n";
    93 
    9492    my $archive_info_filename = $self->{'archive_info_filename'};
    9593
     
    10098
    10199        my $index_status = $archive_info->get_status_info($doc_oid);
    102         print STDERR "*** Updating $doc_oid $index_status\n";
     100####        print STDERR "*** Away to Update $doc_oid $index_status\n";
    103101
    104102        if ($index_status eq "D") {
    105103        # delete
    106104        $archive_info->delete_info($doc_oid);
     105        &GDBMUtils::gdbmDatabaseRemove($archive_info_filename,$doc_oid);
    107106        }
    108107        elsif ($index_status =~ m/^(I|R)$/) {
     
    215214        my $process_file = 1;
    216215
    217         # ...unless the build processor is incremental capable and -incremental was specified
     216        # ...unless the build processor is incremental capable and -incremental was specified, in which case we need to check its index_status flag
    218217        if ($processor->is_incremental_capable() && $self->{'incremental'})
    219218        {
     
    230229        }
    231230        elsif ($index_status eq "R") {
    232             # Need to be delete it from the index.
     231            # Need to be reindexed/replaced
    233232            $new_mode = $curr_mode."reindex";
     233
    234234            $process_file = 1;
    235235        }
     
    237237        # ... or we're being asked to delete it (in which case skip it)
    238238        elsif ($index_status eq "D") {
    239         # Delete it somehow from archives dir!!
    240         # => get short name, lop off filename, concat archivedir
    241         # move to recyle bin
     239        # Non-incremental Delete
     240        # It's already been deleted from the archives directory
     241        # (done during import.pl)
     242        # => All we need to do here is not process it
    242243
    243244        $process_file = 0;
    244245        }
     246
     247        if (!$processor->is_incremental_capable() && $self->{'incremental'}) {
     248        # Nag feature
     249        if (!defined $self->{'incremental-warning'}) {
     250            print $outhandle "\n";
     251            print $outhandle "Warning: command-line option '-incremental' used with *non-incremental*\n";
     252            print $outhandle "         processor '", ref $processor, "'. Some conflicts may arise.\n";
     253            print $outhandle "\n";
     254            sleep 10;
     255            $self->{'incremental-warning'} = 1;
     256        }
     257        }
     258
     259
    245260
    246261        if ($process_file) {
  • gsdl/trunk/perllib/plugins/BasePlugin.pm

    r18441 r18469  
    833833
    834834    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
     835
    835836    # create a new document
    836837    my $doc_obj = new doc ($filename_full_path, "indexed_doc", $self->{'file_rename_method'});
     
    840841    $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename_full_path));
    841842 
     843
    842844    # sets the UTF8 filename (Source) for display and sets the url ref to URL encoded version
    843845    # of the UTF8 filename (SourceFile) for generated files
    844846    $self->set_Source_metadata($doc_obj, $filename_no_path);
     847
    845848
    846849    # plugin specific stuff - what args do we need here??
     
    946949    # delete any temp files that we may have created
    947950    $self->clean_up_after_doc_obj_processing();
     951
    948952
    949953    # if process_status == 1, then the file has been processed.
  • gsdl/trunk/perllib/plugins/DirectoryPlugin.pm

    r18441 r18469  
    575575
    576576    if (defined $self->{'inf_timestamp'}) {
    577         my $inf_timestamp = $self->{'inf_timestamp'};
    578 
    579         if (! -d $full_filename) {
    580         my $filename_timestamp = -M $full_filename;
    581         if ($filename_timestamp > $inf_timestamp) {
    582             # filename has been around for longer than inf
    583             print $outhandle "**** Skipping $subfile\n" if ($verbosity >3);
    584             next;
     577        # Look to see if it's a completely new file
     578
     579        if (!$block_hash->{'new_files'}->{$full_filename}) {
     580        # Not a new file, must be an existing file
     581        # Let' see if it's newer than the last import.pl
     582
     583
     584        my $inf_timestamp = $self->{'inf_timestamp'};
     585
     586        if (! -d $full_filename) {
     587            my $filename_timestamp = -M $full_filename;
     588            if ($filename_timestamp > $inf_timestamp) {
     589            # filename has been around for longer than inf
     590            print $outhandle "**** Skipping $subfile\n" if ($verbosity >3);
     591            next;
     592            }
     593            else {
     594            # Remove old folder in archives (might hash to something different)
     595            # *** should be doing this on a Del one as well
     596            # but leave folder name?? and ensure hashs to
     597            # same again??
     598
     599            # Then let through as new doc??
     600
     601            # mark to doc-oids that rely on it for re-indexing
     602            ## &GDBMUtils::gdbmDatabase();
     603
     604            }
    585605        }
    586606        }
  • gsdl/trunk/perllib/util.pm

    r18463 r18469  
    3535sub rm {
    3636    my (@files) = @_;
     37
    3738    my @filefiles = ();
    3839
Note: See TracChangeset for help on using the changeset viewer.