Changeset 19498

Show
Ignore:
Timestamp:
18.05.2009 11:00:51 (10 years ago)
Author:
davidb
Message:

Supporting routines that exploit the new 'metafiles' structures, introduction to track which metadata.xml file a piece of metadata came from

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/inexport.pm

    r19266 r19498  
    125125    } 
    126126 
    127     # figure of if new file or not 
     127    # figure out if new file or not 
    128128    if (defined $full_prev_all_files->{$full_curr_file}) { 
    129129         
     
    152152    # In building the final list of files to delete, we test to see if 
    153153    # it exists on the filesystem and if it does (unusual for a file 
    154     # that's allegedly deleted!) , supress it from going into the final 
     154    # that's allegedly deleted!), supress it from going into the final 
    155155    # list 
    156156 
     
    175175 
    176176 
     177sub is_assoc_file 
     178{ 
     179    my ($file,$doc_rec) = @_; 
     180 
     181    foreach my $af (@{$doc_rec->{'assoc-file'}}) { 
     182    return 1 if ($af eq $file); 
     183    } 
     184 
     185    return 0; 
     186} 
     187 
     188 
    177189sub _mark_docs_for_deletion 
    178190{ 
    179     my ($archive_info,$deleted_files_ref,$archivedir,$verbosity,$mode_text) = @_; 
     191    my ($archive_info,$block_hash,$deleted_files,$archivedir,$verbosity,$mode_text) = @_; 
    180192 
    181193    my $doc_db = "archiveinf-doc.gdb"; 
     
    186198 
    187199    # record files marked for deletion in arcinfo 
    188     foreach my $file (@$deleted_files_ref) { 
     200    foreach my $file (@$deleted_files) { 
    189201    # use 'archiveinf-src' GDBM file to look up all the OIDs 
    190202    # this file is used in (note in most cases, it's just one OID) 
     
    194206    foreach my $oid (@$oids) { 
    195207 
    196         # Find out if it's an assoc file or main doc 
     208        # Find out if it's a main doc, assoc file, or metadata 
    197209 
    198210        my $doc_rec = GDBMUtils::gdbmRecordToHash($arcinfo_doc_filename,$oid); 
    199         if ($doc_rec->{'src-file'}->[0] eq $file) { 
    200         # It's the main doc 
    201         # => mark it for deletion 
    202      
    203         if ($verbosity>1) { 
    204             print STDERR "$oid ($file) marked to be $mode_text on next buildcol.pl\n"; 
    205         } 
    206         $archive_info->set_status_info($oid,"D"); 
    207  
    208         my $val = &GDBMUtils::gdbmDatabaseGet($arcinfo_doc_filename,$oid); 
    209         my ($index_status) = ($val =~ m/^<index-status>(.*)$/m); 
    210         if ($index_status ne "D") { 
    211             $val =~ s/^<index-status>(.*)$/<index-status>D/m; 
    212             &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val); 
    213         } 
    214         } 
    215         else { 
     211 
     212        if (is_assoc_file($file,$doc_rec)) { 
    216213        # assoc file => mark it for re-indexing (safest thing to do) 
    217214        my $curr_status = $archive_info->get_status_info($oid); 
     
    229226            &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val); 
    230227        } 
     228        GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file); 
    231229        } 
    232  
    233         GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file); 
     230        else { 
     231        # either src-file or metadata.xml file linking to src-file 
     232 
     233        my $src_file; 
     234 
     235        if ($doc_rec->{'src-file'}->[0] ne $file) { 
     236            # it's a metadata file attached to this OID 
     237            # => workout the src-file it matches to 
     238 
     239            $src_file = $doc_rec->{'src-file'}->[0]; 
     240             
     241            my $src_filename = $src_file; 
     242            if (!&util::filename_is_absolute($src_file)) { 
     243            $src_filename = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},$src_file); 
     244            } 
     245             
     246            $block_hash->{'reindex_files'}->{$src_filename} = 1; 
     247        } 
     248        else { 
     249            # It's the main doc 
     250            # => make it the target and mark it for deletion 
     251            $src_file = $file; 
     252        } 
     253 
     254        # Whether the main file directly or indirectly, mark for deletion/reindex 
     255 
     256        if ($verbosity>1) { 
     257            print STDERR "$oid ($src_file) marked to be $mode_text on next buildcol.pl\n"; 
     258        } 
     259        $archive_info->set_status_info($oid,"D"); 
     260 
     261        my $val = &GDBMUtils::gdbmDatabaseGet($arcinfo_doc_filename,$oid); 
     262        my ($index_status) = ($val =~ m/^<index-status>(.*)$/m); 
     263        if ($index_status ne "D") { 
     264            $val =~ s/^<index-status>(.*)$/<index-status>D/m; 
     265            &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val); 
     266        } 
     267 
     268        GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$src_file); 
     269        } 
     270 
    234271    } 
    235272    } 
     
    245282sub mark_docs_for_reindex 
    246283{ 
    247     my ($archive_info,$existing_files_ref,$archivedir,$verbosity) = @_; 
     284    my ($archive_info,$block_hash,$archivedir,$verbosity) = @_; 
    248285 
    249286    # Reindexing is accomplished by deleting the previously indexed 
     
    260297    # deals with it appropriately. 
    261298 
     299    my @existing_files = sort keys %{$block_hash->{'existing_files'}}; 
     300 
    262301    my $doc_db = "archiveinf-doc.gdb"; 
    263302    my $arcinfo_doc_filename = &util::filename_cat ($archivedir, $doc_db); 
    264303 
    265  
    266304    my $archiveinf_timestamp = -M $arcinfo_doc_filename; 
    267305 
    268     my $reindex_files_ref = []; 
    269  
    270     foreach my $existing_filename (@$existing_files_ref) { 
     306    my $reindex_files = []; 
     307 
     308    foreach my $existing_filename (@existing_files) { 
    271309     
    272310    if (-M $existing_filename < $archiveinf_timestamp) { 
     
    281319###     print STDERR "**** Deleting existing file: $existing_file\n"; 
    282320 
    283         push(@$reindex_files_ref,$existing_file); 
     321        push(@$reindex_files,$existing_file); 
     322        $block_hash->{'reindex_files'}->{$existing_filename} = 1; 
    284323    } 
    285324 
    286325    } 
    287326     
    288     _mark_docs_for_deletion($archive_info,$reindex_files_ref,$archivedir,$verbosity, "reindex"); 
    289  
    290     return @$reindex_files_ref; 
     327    _mark_docs_for_deletion($archive_info,$block_hash,$reindex_files,$archivedir,$verbosity, "reindex"); 
     328 
    291329} 
    292330