Changeset 20578


Ignore:
Timestamp:
2009-09-10T10:51:48+12:00 (15 years ago)
Author:
davidb
Message:

Changes to support new incremental_mode variable, and to track metadata.xml files more closely so incremental building works when a document is first build without any metadata attached, then then in a subsequent build has metadata from such a source as metadata.xml added.

Location:
gsdl/trunk/perllib
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/inexport.pm

    r19789 r20578  
    7979sub new_vs_old_import_diff
    8080{
    81     my ($archive_info,$block_hash,$importdir) = @_;
     81    my ($archive_info,$block_hash,$importdir,$archivedir,$verbosity,$incremental_mode) = @_;
    8282
    8383    # First convert all files to absolute form
     
    108108
    109109    # entry in 'all_files' is moved to either 'existing_files',
    110     # 'deleted_files', or 'new_files'
     110    # 'deleted_files', 'new_files', or 'new_metadata_files'
    111111
    112112    if (!&util::filename_is_absolute($curr_file)) {
     
    129129    # figure out if new file or not
    130130    if (defined $full_prev_all_files->{$full_curr_file}) {
     131       
     132        if ($incremental_mode eq "all") {
     133
     134        # had it before
     135        $block_hash->{'existing_files'}->{$full_curr_file} = 1;
     136       
     137        # Now remove it, so by end of loop only the files
     138        # that need deleting are left
    131139       
    132         # had it before
    133         $block_hash->{'existing_files'}->{$full_curr_file} = 1;
    134 
    135         # Now remove it, so by end of loop only the files
    136         # that need deleting are left
    137 
    138         delete $full_prev_all_files->{$full_curr_file};
    139         }
     140        delete $full_prev_all_files->{$full_curr_file};
     141        }
     142        else {
     143        # Warning in "onlyadd" mode, but had it before!
     144        print STDERR "Warning: File $full_curr_file previously imported.\n";
     145        print STDERR "         Treating as new file\n";
     146
     147        $block_hash->{'new_files'}->{$full_curr_file} = 1;
     148        delete $full_prev_all_files->{$full_curr_file};
     149        }
     150    }
    140151    else {
    141         $block_hash->{'new_files'}->{$full_curr_file} = 1;
    142     }
     152        if ($block_hash->{'metadata_files'}->{$full_curr_file}) {
     153        # the new file is the special sort of file greenstone uses
     154        # to attach metadata to src documents
     155        # i.e metadata.xml
     156        # (but note, the filename used is not constrained in
     157        # Greenstone to always be this)
     158
     159##      print STDERR "***** Detected new metadata file: $full_curr_file\n";
     160        $block_hash->{'new_metadata_files'}->{$full_curr_file} = 1;
     161        }
     162        else {
     163        $block_hash->{'new_files'}->{$full_curr_file} = 1;
     164        }
     165    }
     166
    143167   
    144168    delete $block_hash->{'all_files'}->{$curr_file};
    145169    }
    146170
     171
     172    # Deal with complication of new metadata.xml files by forcing
     173    # everything from this point down in the file hierarchy to
     174    # be freshly imported. 
     175    #
     176    # This may mean files that have not changed are reindexed, but does
     177    # guarantee by the end of processing all new metadata is correctly
     178    # associated with the relevant document(s).
     179
     180    foreach my $new_mdf (keys %{$block_hash->{'new_metadata_files'}}) {
     181    my ($fileroot,$situated_dir,$ext) = fileparse($new_mdf, "\\.[^\\.]+\$");
     182
     183    $situated_dir =~ s/[\\\/]+$//; # remove tailing slashes
     184
     185    # Go through existing_files, and mark anything that is contained
     186    # within 'situated_dir' to be reindexed (in case some of the metadata
     187    # attaches to one of these files)
     188
     189    my $reindex_files = [];
     190
     191    foreach my $existing_f (keys %{$block_hash->{'existing_files'}}) {
     192        # need to protect windows slash \ in regular expression?
     193
     194        if ($existing_f =~ m/^$situated_dir/) {
     195        push(@$reindex_files,$existing_f);
     196        $block_hash->{'reindex_files'}->{$existing_f} = 1;
     197
     198        }
     199    }
     200
     201    # Reindexing is accomplished by putting them in th list for reindexing (line above)
     202    # and then tagging the arcinfo version as to be deleted.
     203
     204    _mark_docs_for_deletion($archive_info,$block_hash,$reindex_files,$archivedir,$verbosity, "reindex");
     205   
     206    # metadata file needs to be in new_files list so parsed by MetadataXMLPlug
     207    # (or equivalent)
     208    $block_hash->{'new_files'}->{$new_mdf} = 1;
     209
     210    }
     211
     212   
    147213    # By this point full_prev_all_files contains the files
    148214    # mentioned in archiveinf-src.db but are not in the 'import'
     
    153219    #
    154220    # In building the final list of files to delete, we test to see if
    155     # it exists on the filesystem and if it does (unusual for a file
    156     # that's allegedly deleted!), supress it from going into the final
    157     # list
     221    # it exists on the filesystem and if it does (unusual for a "normal"
     222    # file in import, but possible in the case of 'tmp' files),
     223    # supress it from going into the final list
    158224
    159225    my $collectdir = $ENV{'GSDLCOLLECTDIR'};
     
    174240      }
    175241      } @deleted_files;
     242
     243
     244
    176245}
    177246
  • gsdl/trunk/perllib/plugin.pm

    r19497 r20578  
    146146sub load_plugins {
    147147    my ($plugin_list) = shift @_;
    148     my $incremental;
    149     ($verbosity, $outhandle, $failhandle, $globaloptions, $incremental) = @_; # globals
     148    my $incremental_mode;
     149    ($verbosity, $outhandle, $failhandle, $globaloptions, $incremental_mode) = @_; # globals
    150150    my @plugin_objects = ();
    151     $incremental = 0 unless (defined $incremental && $incremental == 1);
    152151    $verbosity = 2 unless defined $verbosity;
    153152    $outhandle = 'STDERR' unless defined $outhandle;
     
    184183    $plugobj->init($verbosity, $outhandle, $failhandle);
    185184   
    186     $plugobj->set_incremental($incremental);
     185    $plugobj->set_incremental($incremental_mode);
    187186
    188187    # add this object to the list
  • gsdl/trunk/perllib/scriptutil.pm

    r17143 r20578  
    3535sub check_removeold_and_keepold {
    3636
    37     my ($removeold, $keepold, $incremental, $dir, $collectcfg) = @_;
     37    my ($removeold, $keepold, $incremental, $dir, $collectcfg) = @_;   
     38
     39    # Incremental mode may be set to "none", "onlyadd" or "all"
     40    # depending on status of -keepold and -incremental flags
     41    my $incremental_mode = ($incremental) ? "all" : "none";
    3842
    3943    if (($keepold && $removeold) || ($incremental && $removeold) ) {
    4044    gsprintf(STDERR, "{scripts.both_old_options}\n", $dir);
    4145    sleep(3); #just in case
    42     return (1,0);
     46    return (1,0,0,"none");
    4347   
    4448    }
     
    5054    } elsif (defined $collectcfg->{'keepold'} && $collectcfg->{'keepold'} =~ /^true$/i) {
    5155        $keepold = 1;
     56        $incremental_mode = "onlyadd";
    5257    } elsif (defined $collectcfg->{'incremental'} && $collectcfg->{'incremental'} =~ /^true$/i) {
    5358        $incremental = 1;
     59        $incremental_mode = "all";
    5460    }
    5561    }
     
    5864    gsprintf(STDERR, "{scripts.no_old_options} \n", $dir);
    5965    sleep(3); #just in case
    60     return (1,0);
     66    return (1,0,0,"none");
    6167    }
    6268   
     
    6571    $keepold = 1;
    6672    }
    67     return ($removeold, $keepold, $incremental);
     73    return ($removeold, $keepold, $incremental, $incremental_mode);
    6874
    6975}
Note: See TracChangeset for help on using the changeset viewer.