Ignore:
Timestamp:
2000-07-13T10:21:53+12:00 (24 years ago)
Author:
sjboddie
Message:

merged changes to trunk into New_Config_Format branch

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/New_Config_Format-branch/gsdl/perllib/mgbuilder.pm

    r1072 r1279  
    9999   
    100100    # load all the plugins
    101     $self->{'pluginfo'} = &plugin::load_plugins ($plugins);
     101    $self->{'pluginfo'} = &plugin::load_plugins ($plugins, $verbosity);
    102102    if (scalar(@{$self->{'pluginfo'}}) == 0) {
    103103    print STDERR "No plugins were loaded.\n";
     
    207207    close ($handle) unless $self->{'debug'};
    208208
     209    $self->print_stats();
     210
    209211    # create the compression dictionary
    210212    # the compression dictionary is built by assuming the stats are from a seed
     
    232234           "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
    233235    close ($handle) unless $self->{'debug'};
     236
     237    $self->print_stats();
    234238}
    235239
     
    481485    close ($handle) unless $self->{'debug'};
    482486
     487    $self->print_stats();
     488
    483489    if (!$self->{'debug'}) {
    484490    # create the perfect hash function
     
    502508           "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
    503509   
     510    $self->print_stats ();
     511
    504512    if (!$self->{'debug'}) {
    505513
     
    646654    $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes();
    647655
     656    # get additional stats from mg
     657    my $exedir = "$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}";
     658    my $exe = &util::get_os_exe ();
     659    my $mgstat_exe = &util::filename_cat($exedir, "mgstat$exe");
     660    my $input_file = &util::filename_cat ("text", $self->{'collection'});
     661    if (!-e "$mgstat_exe" || !open (PIPEIN, "$mgstat_exe -d $self->{'build_dir'} -f $input_file |")) {
     662    print STDERR "Warning: Couldn't open pipe to $mgstat_exe to get additional stats\n";
     663    } else {
     664    my $line = "";
     665    while (defined ($line = <PIPEIN>)) {
     666        if ($line =~ /^Words in collection \[dict\]\s+:\s+(\d+)/) {
     667        ($build_cfg->{'numwords'}) = $1;
     668        } elsif ($line =~ /^Documents\s+:\s+(\d+)/) {
     669        ($build_cfg->{'numsections'}) = $1;
     670        }
     671    }
     672    close PIPEIN;
     673    }
     674
    648675    # store the mapping between the index names and the directory names
    649676    my @indexmap = ();
     
    667694    $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap);
    668695
    669     $build_cfg->{'notbuilt'} = $self->{'notbuilt'};
     696    $build_cfg->{'notbuilt'} = $self->{'notbuilt'} if scalar @{$self->{'notbuilt'}};
    670697
    671698    # write out the build information
    672699    &cfgread::write_cfg_file("$self->{'build_dir'}/build.cfg", $build_cfg,
    673                  '^(builddate|numdocs|numbytes)$',
     700                 '^(builddate|numdocs|numbytes|numwords|numsections)$',
    674701                             '^(indexmap|subcollectionmap|languagemap|notbuilt)$');
    675702
     
    680707}
    681708
     709sub print_stats {
     710    my $self = shift (@_);
     711
     712    my $indexing_text = $self->{'buildproc'}->get_indexing_text();
     713    my $index = $self->{'buildproc'}->get_index();
     714    my $num_bytes = $self->{'buildproc'}->get_num_bytes();
     715    my $num_processed_bytes = $self->{'buildproc'}->get_num_processed_bytes();
     716
     717    if ($indexing_text) {
     718    print STDERR "Stats (Creating index $index)\n";
     719    } else {
     720    print STDERR "Stats (Compressing text from $index)\n";
     721    }
     722    print STDERR "Total bytes in collection: $num_bytes\n";
     723    print STDERR "Total bytes in $index: $num_processed_bytes\n";
     724
     725    if ($num_processed_bytes < 50) {
     726    print STDERR "***************\n";
     727    print STDERR "WARNING: There is very little or no text to process for $index\n";
     728    if ($indexing_text) {
     729        print STDERR "This may cause an error while attempting to build the index\n";
     730    } else {
     731        print STDERR "This may cause an error while attempting to compress the text\n";
     732    }
     733    print STDERR "***************\n";
     734    }
     735}
    682736
    6837371;
Note: See TracChangeset for help on using the changeset viewer.