Changeset 10159


Ignore:
Timestamp:
2005-06-23T11:25:53+12:00 (19 years ago)
Author:
davidb
Message:

Package upgraded to support incremental building. Main change it to set
doc_num counters etc to that of previous build (loaded in from build.cfg if
present). 'reset' also returns their values to the pre-loaded value.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/basebuildproc.pm

    r10028 r10159  
    4141
    4242sub new {
    43     my ($class, $collection, $source_dir, $build_dir,
     43    my ($class, $collection, $source_dir, $build_dir, $keepold,
    4444    $verbosity, $outhandle) = @_;
    4545    my $self = new docproc ();
     
    5252    $self->{'collection'} = $collection;
    5353    $self->{'source_dir'} = $source_dir;
    54     $self->{'build_dir'} = $build_dir;
    55     $self->{'verbosity'} = $verbosity;
    56     $self->{'outhandle'} = $outhandle;
     54    $self->{'build_dir'}  = $build_dir;
     55    $self->{'keepold'}    = $keepold;
     56    $self->{'verbosity'}  = $verbosity;
     57    $self->{'outhandle'}  = $outhandle;
    5758
    5859    $self->{'classifiers'} = [];
     
    6465    $self->{'indexexparr'} = [];
    6566
     67    my $found_num_data = 0;
     68    my $buildconfigfile = undef;
     69
     70    if ($keepold) {
     71    # For incremental building need to seed num_docs etc from values
     72    # stored in build.cfg (if present)
     73
     74    $buildconfigfile = &util::filename_cat($build_dir, "build.cfg");
     75
     76    if (-e $buildconfigfile) {
     77        $found_num_data = 1;
     78    }
     79    else {
     80        # try the index dir
     81        $buildconfigfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},
     82                           "index", "build.cfg");
     83        if (-e $buildconfigfile) {
     84        $found_num_data = 1;
     85        }
     86    }
     87
     88    }
     89
     90    if ($found_num_data) {
     91    my $buildcfg = &colcfg::read_build_cfg($buildconfigfile);
     92   
     93    $self->{'starting_num_docs'}     = $buildcfg->{'numdocs'};
     94    $self->{'starting_num_sections'} = $buildcfg->{'numsections'};
     95    $self->{'starting_num_bytes'}    = $buildcfg->{'numbytes'};
     96    }
     97    else {
     98    $self->{'starting_num_docs'}     = 0;
     99    $self->{'starting_num_sections'} = 0;
     100    $self->{'starting_num_bytes'}    = 0;
     101    }
     102
    66103    $self->{'output_handle'} = "STDOUT";
    67     $self->{'num_docs'} = 0;
    68     $self->{'num_sections'} = 0;
    69     $self->{'num_bytes'} = 0;
     104    $self->{'num_docs'}      = $self->{'starting_num_docs'};
     105    $self->{'num_sections'}  = $self->{'starting_num_sections'};
     106    $self->{'num_bytes'}     = $self->{'starting_num_bytes'};
     107
    70108    $self->{'num_processed_bytes'} = 0;
    71109    $self->{'store_text'} = 1;
     
    84122sub reset {
    85123    my $self = shift (@_);
    86    
    87     $self->{'num_docs'} = 0;
    88     $self->{'num_sections'} = 0;
     124
     125    $self->{'num_docs'}      = $self->{'starting_num_docs'};
     126    $self->{'num_sections'}  = $self->{'starting_num_sections'};
     127    $self->{'num_bytes'}     = $self->{'starting_num_bytes'};
     128   
    89129    $self->{'num_processed_bytes'} = 0;
    90     $self->{'num_bytes'} = 0;
     130}
     131
     132sub zero_reset {
     133    my $self = shift (@_);
     134
     135    $self->{'num_docs'}      = 0;
     136    $self->{'num_sections'}  = 0;
     137    $self->{'num_bytes'}     = 0;
     138   
     139    $self->{'num_processed_bytes'} = 0;
    91140}
    92141
     
    132181
    133182    $self->{'mode'} = $mode;
     183}
     184
     185sub get_mode {
     186    my $self = shift (@_);
     187
     188    return $self->{'mode'};
    134189}
    135190
     
    220275    my ($doc_obj, $filename) = @_;
    221276    my $handle = $self->{'output_handle'};
     277
     278    print STDERR "***** infodb called with doc_obj = $doc_obj\n";
    222279
    223280    my $doctype = $doc_obj->get_doc_type();
Note: See TracChangeset for help on using the changeset viewer.