Changeset 19498 for gsdl/trunk


Ignore:
Timestamp:
2009-05-18T11:00:51+12:00 (15 years ago)
Author:
davidb
Message:

Supporting routines that exploit the new 'metafiles' structures, introduction to track which metadata.xml file a piece of metadata came from

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/inexport.pm

    r19266 r19498  
    125125    }
    126126
    127     # figure of if new file or not
     127    # figure out if new file or not
    128128    if (defined $full_prev_all_files->{$full_curr_file}) {
    129129       
     
    152152    # In building the final list of files to delete, we test to see if
    153153    # it exists on the filesystem and if it does (unusual for a file
    154     # that's allegedly deleted!) , supress it from going into the final
     154    # that's allegedly deleted!), supress it from going into the final
    155155    # list
    156156
     
    175175
    176176
     177sub is_assoc_file
     178{
     179    my ($file,$doc_rec) = @_;
     180
     181    foreach my $af (@{$doc_rec->{'assoc-file'}}) {
     182    return 1 if ($af eq $file);
     183    }
     184
     185    return 0;
     186}
     187
     188
    177189sub _mark_docs_for_deletion
    178190{
    179     my ($archive_info,$deleted_files_ref,$archivedir,$verbosity,$mode_text) = @_;
     191    my ($archive_info,$block_hash,$deleted_files,$archivedir,$verbosity,$mode_text) = @_;
    180192
    181193    my $doc_db = "archiveinf-doc.gdb";
     
    186198
    187199    # record files marked for deletion in arcinfo
    188     foreach my $file (@$deleted_files_ref) {
     200    foreach my $file (@$deleted_files) {
    189201    # use 'archiveinf-src' GDBM file to look up all the OIDs
    190202    # this file is used in (note in most cases, it's just one OID)
     
    194206    foreach my $oid (@$oids) {
    195207
    196         # Find out if it's an assoc file or main doc
     208        # Find out if it's a main doc, assoc file, or metadata
    197209
    198210        my $doc_rec = GDBMUtils::gdbmRecordToHash($arcinfo_doc_filename,$oid);
    199         if ($doc_rec->{'src-file'}->[0] eq $file) {
    200         # It's the main doc
    201         # => mark it for deletion
    202    
    203         if ($verbosity>1) {
    204             print STDERR "$oid ($file) marked to be $mode_text on next buildcol.pl\n";
    205         }
    206         $archive_info->set_status_info($oid,"D");
    207 
    208         my $val = &GDBMUtils::gdbmDatabaseGet($arcinfo_doc_filename,$oid);
    209         my ($index_status) = ($val =~ m/^<index-status>(.*)$/m);
    210         if ($index_status ne "D") {
    211             $val =~ s/^<index-status>(.*)$/<index-status>D/m;
    212             &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val);
    213         }
    214         }
    215         else {
     211
     212        if (is_assoc_file($file,$doc_rec)) {
    216213        # assoc file => mark it for re-indexing (safest thing to do)
    217214        my $curr_status = $archive_info->get_status_info($oid);
     
    229226            &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val);
    230227        }
     228        GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file);
    231229        }
    232 
    233         GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file);
     230        else {
     231        # either src-file or metadata.xml file linking to src-file
     232
     233        my $src_file;
     234
     235        if ($doc_rec->{'src-file'}->[0] ne $file) {
     236            # it's a metadata file attached to this OID
     237            # => workout the src-file it matches to
     238
     239            $src_file = $doc_rec->{'src-file'}->[0];
     240           
     241            my $src_filename = $src_file;
     242            if (!&util::filename_is_absolute($src_file)) {
     243            $src_filename = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},$src_file);
     244            }
     245           
     246            $block_hash->{'reindex_files'}->{$src_filename} = 1;
     247        }
     248        else {
     249            # It's the main doc
     250            # => make it the target and mark it for deletion
     251            $src_file = $file;
     252        }
     253
     254        # Whether the main file directly or indirectly, mark for deletion/reindex
     255
     256        if ($verbosity>1) {
     257            print STDERR "$oid ($src_file) marked to be $mode_text on next buildcol.pl\n";
     258        }
     259        $archive_info->set_status_info($oid,"D");
     260
     261        my $val = &GDBMUtils::gdbmDatabaseGet($arcinfo_doc_filename,$oid);
     262        my ($index_status) = ($val =~ m/^<index-status>(.*)$/m);
     263        if ($index_status ne "D") {
     264            $val =~ s/^<index-status>(.*)$/<index-status>D/m;
     265            &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val);
     266        }
     267
     268        GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$src_file);
     269        }
     270
    234271    }
    235272    }
     
    245282sub mark_docs_for_reindex
    246283{
    247     my ($archive_info,$existing_files_ref,$archivedir,$verbosity) = @_;
     284    my ($archive_info,$block_hash,$archivedir,$verbosity) = @_;
    248285
    249286    # Reindexing is accomplished by deleting the previously indexed
     
    260297    # deals with it appropriately.
    261298
     299    my @existing_files = sort keys %{$block_hash->{'existing_files'}};
     300
    262301    my $doc_db = "archiveinf-doc.gdb";
    263302    my $arcinfo_doc_filename = &util::filename_cat ($archivedir, $doc_db);
    264303
    265 
    266304    my $archiveinf_timestamp = -M $arcinfo_doc_filename;
    267305
    268     my $reindex_files_ref = [];
    269 
    270     foreach my $existing_filename (@$existing_files_ref) {
     306    my $reindex_files = [];
     307
     308    foreach my $existing_filename (@existing_files) {
    271309   
    272310    if (-M $existing_filename < $archiveinf_timestamp) {
     
    281319###     print STDERR "**** Deleting existing file: $existing_file\n";
    282320
    283         push(@$reindex_files_ref,$existing_file);
     321        push(@$reindex_files,$existing_file);
     322        $block_hash->{'reindex_files'}->{$existing_filename} = 1;
    284323    }
    285324
    286325    }
    287326   
    288     _mark_docs_for_deletion($archive_info,$reindex_files_ref,$archivedir,$verbosity, "reindex");
    289 
    290     return @$reindex_files_ref;
     327    _mark_docs_for_deletion($archive_info,$block_hash,$reindex_files,$archivedir,$verbosity, "reindex");
     328
    291329}
    292330
Note: See TracChangeset for help on using the changeset viewer.