Changeset 18441


Ignore:
Timestamp:
2009-02-01T14:36:22+13:00 (15 years ago)
Author:
davidb
Message:

Modifications for incremental building to support files that need to be deleted

Location:
gsdl/trunk/perllib
Files:
12 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/arcinfo.pm

    r15889 r18441  
    4242use strict;
    4343
     44use dbutil;
    4445
    4546# File format read in: OID <tab> Filename <tab> Optional-Index-Status
     
    6061}
    6162
    62 sub load_info {
    63     my $self = shift (@_);
    64     my ($filename) = @_;
    65 
    66     $self->{'info'} = {};
    67  
     63sub _load_info_txt
     64{
     65    my $self = shift (@_);
     66    my ($filename) = @_;
     67
    6868    if (defined $filename && -e $filename) {
    6969    open (INFILE, $filename) ||
     
    8080    close (INFILE);
    8181    }
    82 }
    83 
    84 sub save_info {
     82
     83
     84}
     85
     86sub _load_info_gdbm
     87{
     88    my $self = shift (@_);
     89    my ($filename) = @_;
     90
     91    my $infodb_map = {};
     92
     93    &dbutil::read_infodb_file_gdbm($filename,$infodb_map);
     94
     95    foreach my $oid ( keys %$infodb_map ) {
     96    my $vals = $infodb_map->{$oid};
     97    # interested in doc-file and index-status
     98
     99    my ($doc_file) = ($vals=~/^<doc-file>(.*)$/m);
     100    my ($index_status) = ($vals=~/^<index-status>(.*)$/m);
     101    $self->add_info ($oid,$doc_file,$index_status);
     102    }
     103}
     104
     105sub load_info {
     106    my $self = shift (@_);
     107    my ($filename) = @_;
     108
     109    $self->{'info'} = {};
     110
     111    if ((defined $filename) && (-e $filename)) {
     112    if ($filename =~ m/\.inf$/) {
     113        $self->_load_info_txt($filename);
     114    }
     115    else {
     116        $self->_load_info_gdbm($filename);
     117    }
     118    }
     119}
     120
     121sub _load_filelist_gdbm
     122{
     123    my $self = shift (@_);
     124    my ($filename) = @_;
     125
     126    my $infodb_map = {};
     127
     128    &dbutil::read_infodb_keys_gdbm($filename,$infodb_map);
     129
     130    foreach my $file ( keys %$infodb_map ) {
     131    $self->{'import_filelist'}->{$file} = 1;
     132    }
     133}
     134
     135
     136sub load_import_filelist {
     137    my $self = shift (@_);
     138    my ($filename) = @_;
     139
     140    $self->{'import-filelist'} = {};
     141
     142    if ((defined $filename) && (-e $filename)) {
     143    if ($filename =~ m/\.inf$/) {
     144        # e.g. 'archives-src.inf' (which includes complete list of file
     145        # from last time import.pl was run)
     146        $self->_load_info_txt($filename);
     147    }
     148    else {
     149        $self->_load_filelist_gdbm($filename);
     150    }
     151    }
     152}
     153
     154sub _save_info_txt {
    85155    my $self = shift (@_);
    86156    my ($filename) = @_;
     
    97167    }
    98168    close (OUTFILE);
     169}
     170
     171sub _save_info_gdbm {
     172    my $self = shift (@_);
     173    my ($filename) = @_;
     174
     175    # Not the most efficient operation, but will do for now
     176
     177    # read it in
     178    my $infodb_map = {};
     179    &dbutil::read_infodb_file_gdbm($filename,$infodb_map);
     180
     181    # change index-status values
     182    foreach my $info (@{$self->get_OID_list()}) {
     183    if (defined $info) {
     184        my ($oid,$doc_file,$index_status) = @$info;
     185        if (defined $infodb_map->{$oid}) {
     186        my $vals_ref = \$infodb_map->{$oid};
     187        $$vals_ref =~ s/^<index-status>(.*)$/<index-status>$index_status/m;
     188        }
     189        else {
     190        print STDERR "Warning: $filename does not have key $oid\n";
     191        }
     192    }
     193    }
     194
     195
     196    # write out again
     197    my $infodb_handle = &dbutil::open_infodb_write_handle_gdbm($filename);
     198    foreach my $oid ( keys %$infodb_map ) {
     199    # consider making the following a method in dbutil
     200    # e.g. write_infodb_rawentry_gdbm($infodb_handle,$oid,$vals);
     201
     202    # no need to escape, as $infodb_map->{$oid} hasn't been unescaped
     203    print $infodb_handle "[$oid]\n";
     204    print $infodb_handle $infodb_map->{$oid};
     205    print $infodb_handle '-' x 70, "\n";
     206    }
     207    &dbutil::close_infodb_write_handle_gdbm($infodb_handle);
     208
     209}
     210
     211sub save_info {
     212    my $self = shift (@_);
     213    my ($filename) = @_;
     214
     215    if ($filename =~ m/\.inf$/) {
     216    $self->_save_info_txt($filename);
     217    }
     218    else {
     219    $self->_save_info_gdbm($filename);
     220    }
    99221}
    100222
  • gsdl/trunk/perllib/basebuilder.pm

    r17573 r18441  
    470470    $build_cfg->{'numsections'} = $self->{'buildproc'}->get_num_sections();
    471471    $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes();
    472    
     472
    473473    # store the mapping between the index names and the directory names
    474474    # the index map is used to determine what indexes there are, so any that are not built should not be put into the map.
  • gsdl/trunk/perllib/cfgread.pm

    r16929 r18441  
    141141    my ($filename, $data, $stringexp, $arrayexp, $hashexp, $arrayarrayexp,
    142142    $hashhashexp) = @_;
    143    
     143
    144144    if (open (COLCFG, ">$filename")) {
    145145    foreach my $key (sort(keys(%$data))) {
  • gsdl/trunk/perllib/cpan/Image/Size.pm

    r13983 r18441  
    1818package Image::Size;
    1919
    20 require 5.6.0;
     20# require 5.6.0;
     21require 5.006_000;
    2122
    2223use strict;
  • gsdl/trunk/perllib/dbutil.pm

    r17476 r18441  
    320320  my $infodb_map = shift(@_);
    321321
    322   open (PIPEIN, "db2txt \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt\n";
     322  open (PIPEIN, "db2txt \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt \$infodb_file_path\"\n";
    323323  my $infodb_line = "";
    324324  my $infodb_key = "";
     
    340340      $infodb_value .= $infodb_line;
    341341    }
     342  }
     343
     344  close (PIPEIN);
     345}
     346
     347sub read_infodb_keys_gdbm
     348{
     349  my $infodb_file_path = shift(@_);
     350  my $infodb_map = shift(@_);
     351
     352  open (PIPEIN, "gdbmkeys \"$infodb_file_path\" |") || die "couldn't open pipe from gdbmkeys \$infodb_file_path\"\n";
     353  my $infodb_line = "";
     354  my $infodb_key = "";
     355  my $infodb_value = "";
     356  while (defined ($infodb_line = <PIPEIN>))
     357  {
     358      chomp $infodb_line; # remove end of line
     359
     360      $infodb_map->{$infodb_line} = 1;
    342361  }
    343362
  • gsdl/trunk/perllib/manifest.pm

    r17249 r18441  
    11package manifest;
    2 
    32
    43use XMLParser;
    54use strict;
    65no strict 'refs'; # allow filehandles to be variables and viceversa
    7 
    86
    97our $self;
  • gsdl/trunk/perllib/plugin.pm

    r17746 r18441  
    301301    die "\n";
    302302    }
    303    
     303
    304304    my $had_error = 0;
    305305    # pass this file by each of the plugins in turn until one
  • gsdl/trunk/perllib/plugins/ArchivesInfPlugin.pm

    r17738 r18441  
    2424###########################################################################
    2525
    26 # plugin which reads through an archives.inf file
    27 # (i.e. the file generated in the archives directory
    28 # when an import is done), processing each file it finds
     26# plugin which reads through an archives.inf (or GDBM equivalent,
     27# archiveinf-doc.{ldb,bdb} file (i.e. the file generated in the
     28# archives directory when an import is done), processing each file it
     29# finds
    2930
    3031package ArchivesInfPlugin;
     
    8687    my ($self) = @_;
    8788
     89    print STDERR "*** Running ArchivesInf deinit\n";
     90
    8891    my $archive_info = $self->{'archive_info'};
    8992
     
    9396        my $file_list = $archive_info->get_file_list();
    9497
    95     # change each file to "Been Indexed"
    96 
    9798    foreach my $subfile (@$file_list) {
    9899        my $doc_oid = $subfile->[1];
    99         # why do we get this when it is not used???
     100
    100101        my $index_status = $archive_info->get_status_info($doc_oid);
    101         $archive_info->set_status_info($doc_oid,"B");
     102        if ($index_status eq "D") {
     103        # delete
     104        $archive_info->delete_info($doc_oid);
     105        }
     106        elsif ($index_status =~ m/^(I|R)$/) {
     107        # mark as "been indexed"
     108        $archive_info->set_status_info($doc_oid,"B");
     109        }
    102110    }
    103111
     
    169177
    170178    # see if this has a archives information file within it
    171     my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");
     179##    my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");
     180    my $db_ext = &util::is_little_endian() ? ".ldb" : ".bdb";
     181    my $doc_db = "archiveinf-doc$db_ext";
     182    my $archive_info_filename = &util::filename_cat($base_dir,$file,$doc_db);
    172183
    173184    if (-e $archive_info_filename) {
     
    187198    # process each file
    188199    foreach my $subfile (@$file_list) {
     200
    189201        last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
    190202
     
    198210        if ($processor->is_incremental_capable() && $self->{'incremental'})
    199211        {
    200             # We don't need to process the file if it has already been built
     212            # Check to see if the file needs indexing
    201213        my $doc_oid = $subfile->[1];
    202214        my $index_status = $archive_info->get_status_info($doc_oid);
    203215        if ($index_status eq "B")
    204216        {
    205             # Don't process this file
     217            # Don't process this file as it has already been indexed
    206218            $process_file = 0;
    207219        }
  • gsdl/trunk/perllib/plugins/BasePlugin.pm

    r18404 r18441  
    433433    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
    434434
     435    if (!-d $filename_full_path) {
     436    $block_hash->{'all_files'}->{$file} = 1;
     437    }
     438
    435439    my $associate_tail_re = $self->{'associate_tail_re'};
    436440    if ((defined $associate_tail_re) && ($associate_tail_re ne "")) {
  • gsdl/trunk/perllib/plugins/DirectoryPlugin.pm

    r17738 r18441  
    131131
    132132    my $output_dir = $processor->getoutputdir();
    133     my $archives_inf = &util::filename_cat($output_dir,"archives.inf");
    134    
     133##  my $archives_inf = &util::filename_cat($output_dir,"archives.inf");
     134    my $db_ext = &util::is_little_endian() ? ".ldb" : ".bdb";
     135    my $doc_db = "archiveinf-doc$db_ext";
     136        my $archives_inf = &util::filename_cat($output_dir,$doc_db);
     137
    135138    if ( -e $archives_inf ) {
    136139        $self->{'inf_timestamp'} = -M $archives_inf;
     
    278281    return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
    279282
     283    $block_hash->{'all_files'} = {} unless defined $block_hash->{'all_files'};
     284
    280285    $block_hash->{'file_blocks'} = {} unless defined $block_hash->{'file_blocks'};
    281286    $block_hash->{'shared_fileroot'} = {} unless defined $block_hash->{'shared_fileroot'};
     
    576581        if ($filename_timestamp > $inf_timestamp) {
    577582            # filename has been around for longer than inf
    578 #####           print $outhandle "**** Skipping $subfile\n";
     583            print $outhandle "**** Skipping $subfile\n" if ($verbosity >3);
    579584            next;
    580585        }
  • gsdl/trunk/perllib/plugouts/BasePlugout.pm

    r17884 r18441  
    404404    ##############################
    405405    $self->saveas($doc_obj,$doc_dir);
    406 ##    $self->archiveinf_gdbm($doc_obj,$doc_dir);
     406    $self->archiveinf_gdbm($doc_obj,$doc_dir);
    407407
    408408}
     
    660660    my $source_filename = $doc_obj->get_source_filename();
    661661
    662     my $oid_files = { 'src-file' => $source_filename,
     662    my $working_info = $self->{'output_info'};
     663    my $doc_info = $working_info->get_info($oid);
     664    my ($doc_file,$index_status) = @$doc_info;
     665
     666    my $oid_files = { 'doc-file' => $doc_file,
     667              'index-status' => $index_status,
     668              'src-file' => $source_filename,
    663669              'assoc-files' => [] };
    664670   
     
    701707
    702708    my $doc_db_text = "";
     709    $doc_db_text .= "<doc-file>$oid_files->{'doc-file'}\n";
     710    $doc_db_text .= "<index-status>$oid_files->{'index-status'}\n";
    703711    $doc_db_text .= "<src-file>$oid_files->{'src-file'}\n";
    704712    foreach my $af (@{$oid_files->{'assoc-files'}}) {
  • gsdl/trunk/perllib/util.pm

    r18411 r18441  
    768768}
    769769
     770sub filename_is_absolute
     771{
     772    my ($filename) = @_;
     773
     774    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
     775    return ($filename =~ m/^(\w:)?\\/);
     776    }
     777    else {
     778    return ($filename =~ m/^\//);
     779    }
     780}
     781
     782
    770783## @method make_absolute()
    771784#
     
    786799   
    787800    my ($base_dir, $dir) = @_;
    788     print STDERR "dir = $dir\n";
     801###    print STDERR "dir = $dir\n";
    789802    $dir =~ s/[\\\/]+/\//g;
    790803    $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
     
    793806    $dir =~ s|/[.][.]?/|/|g;
    794807    $dir =~ tr|/|/|s;
    795     print STDERR "dir = $dir\n";
     808###    print STDERR "dir = $dir\n";
    796809   
    797810    return $dir;
Note: See TracChangeset for help on using the changeset viewer.