Changeset 20776

Show
Ignore:
Timestamp:
05.10.2009 15:50:20 (10 years ago)
Author:
kjdon
Message:

in the middle of fixing small bugs in incremental build. lots of changes here, not sure what they are all for. One important one - if a metadata file has changed, then we need to reimport all files (same as in metadata file was new).

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/inexport.pm

    r20769 r20776  
    8080{ 
    8181    my ($archive_info,$block_hash,$importdir,$archivedir,$verbosity,$incremental_mode) = @_; 
     82 
     83    # in this method, we want to know if metadata files are modified or not. 
     84    my $doc_db = "archiveinf-doc.gdb"; 
     85    my $arcinfo_doc_filename = &util::filename_cat ($archivedir, $doc_db); 
     86 
     87    my $archiveinf_timestamp = -M $arcinfo_doc_filename; 
    8288 
    8389    # First convert all files to absolute form 
     
    108114 
    109115    # entry in 'all_files' is moved to either 'existing_files',  
    110     # 'deleted_files', 'new_files', or 'new_metadata_files' 
     116    # 'deleted_files', 'new_files', or 'new_or_modified_metadata_files' 
    111117 
    112118    if (!&util::filename_is_absolute($curr_file)) { 
     
    115121    } 
    116122 
    117     if (defined $block_hash->{'file_blocks'}->{$full_curr_file}) { 
    118         # If in block list, we want to ignore it 
    119         delete $block_hash->{'all_files'}->{$curr_file}; 
    120  
    121         if (defined $full_prev_all_files->{$full_curr_file}) { 
    122         # also make sure it is gone from 'previous' list so 
    123         # not mistaken for a file that needs to be deleted 
    124         delete $full_prev_all_files->{$full_curr_file}; 
    125         } 
    126         next; 
    127     } 
    128  
    129123    # figure out if new file or not 
    130124    if (defined $full_prev_all_files->{$full_curr_file}) { 
     125        # delete it so that only files that need deleting are left 
     126        delete $full_prev_all_files->{$full_curr_file}; 
     127         
     128        # had it before. is it a metadata file? 
     129        if ($block_hash->{'metadata_files'}->{$full_curr_file}) { 
    131130         
    132         if ($incremental_mode eq "all") { 
    133  
    134         # had it before 
    135         $block_hash->{'existing_files'}->{$full_curr_file} = 1; 
    136          
    137         # Now remove it, so by end of loop only the files 
    138         # that need deleting are left 
    139          
    140         delete $full_prev_all_files->{$full_curr_file}; 
     131        # is it modified?? 
     132        if (-M $full_curr_file < $archiveinf_timestamp) { 
     133            print STDERR "*** Detected a modified metadata file: $full_curr_file\n" if $verbosity > 2; 
     134            # its newer than last build 
     135            $block_hash->{'new_or_modified_metadata_files'}->{$full_curr_file} = 1; 
     136        } 
    141137        } 
    142138        else { 
    143         # Warning in "onlyadd" mode, but had it before! 
    144         print STDERR "Warning: File $full_curr_file previously imported.\n"; 
    145         print STDERR "         Treating as new file\n"; 
    146  
    147         $block_hash->{'new_files'}->{$full_curr_file} = 1; 
    148         delete $full_prev_all_files->{$full_curr_file}; 
     139        if ($incremental_mode eq "all") { 
     140             
     141            # had it before 
     142            $block_hash->{'existing_files'}->{$full_curr_file} = 1; 
     143             
     144        } 
     145        else { 
     146            # Warning in "onlyadd" mode, but had it before! 
     147            print STDERR "Warning: File $full_curr_file previously imported.\n"; 
     148            print STDERR "         Treating as new file\n"; 
     149             
     150            $block_hash->{'new_files'}->{$full_curr_file} = 1; 
     151             
     152        } 
    149153        } 
    150154    } 
     
    157161        # Greenstone to always be this) 
    158162 
    159 ##      print STDERR "***** Detected new metadata file: $full_curr_file\n"; 
    160         $block_hash->{'new_metadata_files'}->{$full_curr_file} = 1; 
     163        print STDERR "***** Detected new metadata file: $full_curr_file\n" if $verbosity > 2; 
     164        $block_hash->{'new_or_modified_metadata_files'}->{$full_curr_file} = 1; 
    161165        } 
    162166        else { 
     
    170174 
    171175 
    172     # Deal with complication of new metadata.xml files by forcing 
     176    # Deal with complication of new or modified metadata files by forcing 
    173177    # everything from this point down in the file hierarchy to 
    174178    # be freshly imported.   
     
    178182    # associated with the relevant document(s). 
    179183 
    180     foreach my $new_mdf (keys %{$block_hash->{'new_metadata_files'}}) { 
     184    foreach my $new_mdf (keys %{$block_hash->{'new_or_modified_metadata_files'}}) { 
    181185    my ($fileroot,$situated_dir,$ext) = fileparse($new_mdf, "\\.[^\\.]+\$"); 
    182186 
     
    199203    } 
    200204 
    201     # Reindexing is accomplished by putting them in th list for reindexing (line above) 
     205    # Reindexing is accomplished by putting them in the list for reindexing (line above) 
    202206    # and then tagging the arcinfo version as to be deleted. 
    203207 
     
    279283    my $src_rec = GDBMUtils::gdbmRecordToHash($arcinfo_src_filename,$file); 
    280284    my $oids = $src_rec->{'oid'}; 
     285    my $file_record_deleted = 0; 
    281286    foreach my $oid (@$oids) { 
    282287        # Find out if it's a main doc, assoc file, or metadata 
    283288 
    284289        my $doc_rec = GDBMUtils::gdbmRecordToHash($arcinfo_doc_filename,$oid); 
     290        my $doc_source_file = $doc_rec->{'src-file'}->[0]; 
     291        if (!&util::filename_is_absolute($doc_source_file)) { 
     292        $doc_source_file = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},$doc_source_file); 
     293        } 
    285294 
    286295        if (is_assoc_file($file,$doc_rec)) { 
     296        ## -- kjdon - here, do same thing as for metadata file?? 
     297        ## mark source for reimport?? 
    287298        # assoc file => mark it for re-indexing (safest thing to do) 
    288299        my $curr_status = $archive_info->get_status_info($oid); 
    289300 
    290  
     301        # mark source doc for reimport as one of its assoc files has changed or deleted 
     302        $block_hash->{'reindex_files'}->{$doc_source_file} = 1; 
    291303        if (defined($curr_status) && (($curr_status ne "D") && ($curr_status ne "R"))) { 
    292304            if ($verbosity > 1) { 
     
    300312            &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val); 
    301313        } 
    302         GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file); 
     314        GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file) unless $file_record_deleted; 
     315        $file_record_deleted = 1; 
    303316        } 
    304317        else { 
    305318        # either src-file or metadata.xml file linking to src-file 
    306  
    307         my $src_file; 
     319        # actually, metadata files should not get here, as are  
     320        # processed earlier 
    308321 
    309322        if ($doc_rec->{'src-file'}->[0] ne $file) { 
    310323            # it's a metadata file attached to this OID 
    311             # => workout the src-file it matches to 
    312  
    313             $src_file = $doc_rec->{'src-file'}->[0]; 
     324            # => reindex the src-file it matches to 
     325 
     326            $block_hash->{'reindex_files'}->{$doc_source_file} = 1; 
     327 
     328            # remove the metadata file from the src-database 
    314329             
    315             my $src_filename = $src_file; 
    316             if (!&util::filename_is_absolute($src_file)) { 
    317             $src_filename = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},$src_file); 
    318             } 
    319              
    320             $block_hash->{'reindex_files'}->{$src_filename} = 1; 
    321  
    322             # remove the metadata file from the src-database 
    323             GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file); 
    324         } 
    325         else { 
    326             # It's the main doc 
    327             # => make it the target and mark it for deletion 
    328             $src_file = $file; 
     330            GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file) unless $file_record_deleted; 
     331            $file_record_deleted = 1; 
    329332        } 
    330333 
     
    336339        if ($index_status ne "D") { 
    337340            if ($verbosity>1) { 
    338             print STDERR "$oid ($src_file) marked to be $mode_text on next buildcol.pl\n"; 
     341            print STDERR "$oid ($doc_source_file) marked to be $mode_text on next buildcol.pl\n"; 
    339342            } 
    340343            $archive_info->set_status_info($oid,"D"); 
     
    343346            &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val); 
    344347 
    345             GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$src_file); 
     348            GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$doc_source_file); 
    346349        } 
    347350