Changeset 20776


Ignore:
Timestamp:
2009-10-05T15:50:20+13:00 (13 years ago)
Author:
kjdon
Message:

in the middle of fixing small bugs in incremental build. lots of changes here, not sure what they are all for. One important one - if a metadata file has changed, then we need to reimport all files (same as in metadata file was new).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/inexport.pm

    r20769 r20776  
    8080{
    8181    my ($archive_info,$block_hash,$importdir,$archivedir,$verbosity,$incremental_mode) = @_;
     82
     83    # in this method, we want to know if metadata files are modified or not.
     84    my $doc_db = "archiveinf-doc.gdb";
     85    my $arcinfo_doc_filename = &util::filename_cat ($archivedir, $doc_db);
     86
     87    my $archiveinf_timestamp = -M $arcinfo_doc_filename;
    8288
    8389    # First convert all files to absolute form
     
    108114
    109115    # entry in 'all_files' is moved to either 'existing_files',
    110     # 'deleted_files', 'new_files', or 'new_metadata_files'
     116    # 'deleted_files', 'new_files', or 'new_or_modified_metadata_files'
    111117
    112118    if (!&util::filename_is_absolute($curr_file)) {
     
    115121    }
    116122
    117     if (defined $block_hash->{'file_blocks'}->{$full_curr_file}) {
    118         # If in block list, we want to ignore it
    119         delete $block_hash->{'all_files'}->{$curr_file};
    120 
    121         if (defined $full_prev_all_files->{$full_curr_file}) {
    122         # also make sure it is gone from 'previous' list so
    123         # not mistaken for a file that needs to be deleted
    124         delete $full_prev_all_files->{$full_curr_file};
    125         }
    126         next;
    127     }
    128 
    129123    # figure out if new file or not
    130124    if (defined $full_prev_all_files->{$full_curr_file}) {
     125        # delete it so that only files that need deleting are left
     126        delete $full_prev_all_files->{$full_curr_file};
     127       
     128        # had it before. is it a metadata file?
     129        if ($block_hash->{'metadata_files'}->{$full_curr_file}) {
    131130       
    132         if ($incremental_mode eq "all") {
    133 
    134         # had it before
    135         $block_hash->{'existing_files'}->{$full_curr_file} = 1;
    136        
    137         # Now remove it, so by end of loop only the files
    138         # that need deleting are left
    139        
    140         delete $full_prev_all_files->{$full_curr_file};
     131        # is it modified??
     132        if (-M $full_curr_file < $archiveinf_timestamp) {
     133            print STDERR "*** Detected a modified metadata file: $full_curr_file\n" if $verbosity > 2;
     134            # its newer than last build
     135            $block_hash->{'new_or_modified_metadata_files'}->{$full_curr_file} = 1;
     136        }
    141137        }
    142138        else {
    143         # Warning in "onlyadd" mode, but had it before!
    144         print STDERR "Warning: File $full_curr_file previously imported.\n";
    145         print STDERR "         Treating as new file\n";
    146 
    147         $block_hash->{'new_files'}->{$full_curr_file} = 1;
    148         delete $full_prev_all_files->{$full_curr_file};
     139        if ($incremental_mode eq "all") {
     140           
     141            # had it before
     142            $block_hash->{'existing_files'}->{$full_curr_file} = 1;
     143           
     144        }
     145        else {
     146            # Warning in "onlyadd" mode, but had it before!
     147            print STDERR "Warning: File $full_curr_file previously imported.\n";
     148            print STDERR "         Treating as new file\n";
     149           
     150            $block_hash->{'new_files'}->{$full_curr_file} = 1;
     151           
     152        }
    149153        }
    150154    }
     
    157161        # Greenstone to always be this)
    158162
    159 ##      print STDERR "***** Detected new metadata file: $full_curr_file\n";
    160         $block_hash->{'new_metadata_files'}->{$full_curr_file} = 1;
     163        print STDERR "***** Detected new metadata file: $full_curr_file\n" if $verbosity > 2;
     164        $block_hash->{'new_or_modified_metadata_files'}->{$full_curr_file} = 1;
    161165        }
    162166        else {
     
    170174
    171175
    172     # Deal with complication of new metadata.xml files by forcing
     176    # Deal with complication of new or modified metadata files by forcing
    173177    # everything from this point down in the file hierarchy to
    174178    # be freshly imported. 
     
    178182    # associated with the relevant document(s).
    179183
    180     foreach my $new_mdf (keys %{$block_hash->{'new_metadata_files'}}) {
     184    foreach my $new_mdf (keys %{$block_hash->{'new_or_modified_metadata_files'}}) {
    181185    my ($fileroot,$situated_dir,$ext) = fileparse($new_mdf, "\\.[^\\.]+\$");
    182186
     
    199203    }
    200204
    201     # Reindexing is accomplished by putting them in th list for reindexing (line above)
     205    # Reindexing is accomplished by putting them in the list for reindexing (line above)
    202206    # and then tagging the arcinfo version as to be deleted.
    203207
     
    279283    my $src_rec = GDBMUtils::gdbmRecordToHash($arcinfo_src_filename,$file);
    280284    my $oids = $src_rec->{'oid'};
     285    my $file_record_deleted = 0;
    281286    foreach my $oid (@$oids) {
    282287        # Find out if it's a main doc, assoc file, or metadata
    283288
    284289        my $doc_rec = GDBMUtils::gdbmRecordToHash($arcinfo_doc_filename,$oid);
     290        my $doc_source_file = $doc_rec->{'src-file'}->[0];
     291        if (!&util::filename_is_absolute($doc_source_file)) {
     292        $doc_source_file = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},$doc_source_file);
     293        }
    285294
    286295        if (is_assoc_file($file,$doc_rec)) {
     296        ## -- kjdon - here, do same thing as for metadata file??
     297        ## mark source for reimport??
    287298        # assoc file => mark it for re-indexing (safest thing to do)
    288299        my $curr_status = $archive_info->get_status_info($oid);
    289300
    290 
     301        # mark source doc for reimport as one of its assoc files has changed or deleted
     302        $block_hash->{'reindex_files'}->{$doc_source_file} = 1;
    291303        if (defined($curr_status) && (($curr_status ne "D") && ($curr_status ne "R"))) {
    292304            if ($verbosity > 1) {
     
    300312            &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val);
    301313        }
    302         GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file);
     314        GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file) unless $file_record_deleted;
     315        $file_record_deleted = 1;
    303316        }
    304317        else {
    305318        # either src-file or metadata.xml file linking to src-file
    306 
    307         my $src_file;
     319        # actually, metadata files should not get here, as are
     320        # processed earlier
    308321
    309322        if ($doc_rec->{'src-file'}->[0] ne $file) {
    310323            # it's a metadata file attached to this OID
    311             # => workout the src-file it matches to
    312 
    313             $src_file = $doc_rec->{'src-file'}->[0];
     324            # => reindex the src-file it matches to
     325
     326            $block_hash->{'reindex_files'}->{$doc_source_file} = 1;
     327
     328            # remove the metadata file from the src-database
    314329           
    315             my $src_filename = $src_file;
    316             if (!&util::filename_is_absolute($src_file)) {
    317             $src_filename = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},$src_file);
    318             }
    319            
    320             $block_hash->{'reindex_files'}->{$src_filename} = 1;
    321 
    322             # remove the metadata file from the src-database
    323             GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file);
    324         }
    325         else {
    326             # It's the main doc
    327             # => make it the target and mark it for deletion
    328             $src_file = $file;
     330            GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file) unless $file_record_deleted;
     331            $file_record_deleted = 1;
    329332        }
    330333
     
    336339        if ($index_status ne "D") {
    337340            if ($verbosity>1) {
    338             print STDERR "$oid ($src_file) marked to be $mode_text on next buildcol.pl\n";
     341            print STDERR "$oid ($doc_source_file) marked to be $mode_text on next buildcol.pl\n";
    339342            }
    340343            $archive_info->set_status_info($oid,"D");
     
    343346            &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val);
    344347
    345             GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$src_file);
     348            GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$doc_source_file);
    346349        }
    347350
Note: See TracChangeset for help on using the changeset viewer.